2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.27 2008/02/06 08:59:28 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl,
51 hammer_alloc_state_t isnew);
52 static int hammer_load_cluster(hammer_cluster_t cluster, int getflags);
53 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
54 static int hammer_load_node(hammer_node_t node);
55 static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type,
57 int32_t start, int *errorp,
58 struct hammer_buffer **bufferp);
60 static void readhammerbuf(hammer_volume_t vol, void *data,
62 static void writehammerbuf(hammer_volume_t vol, const void *data,
65 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
66 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
67 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
68 int32_t start, int isfwd);
69 static void hammer_adjust_stats(hammer_cluster_t cluster,
70 u_int64_t buf_type, int nblks);
72 struct hammer_alist_config Buf_alist_config;
73 struct hammer_alist_config Vol_normal_alist_config;
74 struct hammer_alist_config Vol_super_alist_config;
75 struct hammer_alist_config Supercl_alist_config;
76 struct hammer_alist_config Clu_master_alist_config;
77 struct hammer_alist_config Clu_slave_alist_config;
80 * Red-Black tree support for various structures
83 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
85 if (ip1->obj_id < ip2->obj_id)
87 if (ip1->obj_id > ip2->obj_id)
89 if (ip1->obj_asof < ip2->obj_asof)
91 if (ip1->obj_asof > ip2->obj_asof)
97 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
99 if (info->obj_id < ip->obj_id)
101 if (info->obj_id > ip->obj_id)
103 if (info->obj_asof < ip->obj_asof)
105 if (info->obj_asof > ip->obj_asof)
111 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
113 if (vol1->vol_no < vol2->vol_no)
115 if (vol1->vol_no > vol2->vol_no)
121 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
123 if (cl1->scl_no < cl2->scl_no)
125 if (cl1->scl_no > cl2->scl_no)
131 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
133 if (cl1->clu_no < cl2->clu_no)
135 if (cl1->clu_no > cl2->clu_no)
141 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
143 if (buf1->buf_no < buf2->buf_no)
145 if (buf1->buf_no > buf2->buf_no)
151 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
153 if (node1->node_offset < node2->node_offset)
155 if (node1->node_offset > node2->node_offset)
161 * Note: The lookup function for hammer_ino_rb_tree winds up being named
162 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
163 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
165 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
166 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
167 hammer_inode_info_cmp, hammer_inode_info_t);
168 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
169 hammer_vol_rb_compare, int32_t, vol_no);
170 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
171 hammer_scl_rb_compare, int32_t, scl_no);
172 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
173 hammer_clu_rb_compare, int32_t, clu_no);
174 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
175 hammer_buf_rb_compare, int32_t, buf_no);
176 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
177 hammer_nod_rb_compare, int32_t, node_offset);
179 /************************************************************************
181 ************************************************************************
183 * Load a HAMMER volume by name. Returns 0 on success or a positive error
184 * code on failure. Volumes must be loaded at mount time, get_volume() will
185 * not load a new volume.
187 * Calls made to hammer_load_volume() or single-threaded
190 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
193 hammer_volume_t volume;
194 struct hammer_volume_ondisk *ondisk;
195 struct nlookupdata nd;
196 struct buf *bp = NULL;
201 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
204 * Allocate a volume structure
206 ++hammer_count_volumes;
207 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
208 volume->vol_name = kstrdup(volname, M_HAMMER);
210 hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
211 volume->io.offset = 0LL;
214 * Get the device vnode
216 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
218 error = nlookup(&nd);
220 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
223 if (vn_isdisk(volume->devvp, &error)) {
224 error = vfs_mountedon(volume->devvp);
228 count_udev(volume->devvp->v_umajor, volume->devvp->v_uminor) > 0) {
232 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
233 error = vinvalbuf(volume->devvp, V_SAVE, 0, 0);
235 error = VOP_OPEN(volume->devvp,
236 (ronly ? FREAD : FREAD|FWRITE),
239 vn_unlock(volume->devvp);
242 hammer_free_volume(volume);
245 volume->devvp->v_rdev->si_mountpoint = mp;
248 * Extract the volume number from the volume header and do various
251 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
254 ondisk = (void *)bp->b_data;
255 if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
256 kprintf("hammer_mount: volume %s has an invalid header\n",
261 volume->vol_no = ondisk->vol_no;
262 volume->cluster_base = ondisk->vol_clo_beg;
263 volume->vol_clsize = ondisk->vol_clsize;
264 volume->vol_flags = ondisk->vol_flags;
265 volume->nblocks = ondisk->vol_nblocks;
266 RB_INIT(&volume->rb_clus_root);
267 RB_INIT(&volume->rb_scls_root);
269 hmp->mp->mnt_stat.f_blocks += volume->nblocks;
271 if (RB_EMPTY(&hmp->rb_vols_root)) {
272 hmp->fsid = ondisk->vol_fsid;
273 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
274 kprintf("hammer_mount: volume %s's fsid does not match "
275 "other volumes\n", volume->vol_name);
281 * Insert the volume structure into the red-black tree.
283 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
284 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
285 volume->vol_name, volume->vol_no);
290 * Set the root volume and load the root cluster. HAMMER special
291 * cases rootvol and rootcl and will not deallocate the structures.
292 * We do not hold a ref because this would prevent related I/O
293 * from being flushed.
295 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
296 hmp->rootvol = volume;
301 hammer_ref_volume(volume);
302 hmp->rootcl = hammer_get_cluster(volume,
303 ondisk->vol0_root_clu_no,
304 &error, GET_CLUSTER_NORECOVER);
305 hammer_rel_cluster(hmp->rootcl, 0);
306 hammer_rel_volume(volume, 0);
307 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
313 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
314 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
315 hammer_free_volume(volume);
321 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
322 * so returns -1 on failure.
325 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
327 struct hammer_mount *hmp = volume->hmp;
328 hammer_cluster_t rootcl;
329 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
332 * Sync clusters, sync volume
335 hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
338 * Clean up the root cluster, which is held unlocked in the root
341 if (hmp->rootvol == volume) {
342 if ((rootcl = hmp->rootcl) != NULL)
348 * Unload clusters and super-clusters. Unloading a super-cluster
349 * also unloads related clusters, but the filesystem may not be
350 * using super-clusters so unload clusters anyway.
352 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
353 hammer_unload_cluster, NULL);
354 RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
355 hammer_unload_supercl, NULL);
356 hammer_io_waitdep(&volume->io);
359 * Release our buffer and flush anything left in the buffer cache.
361 hammer_io_release(&volume->io, 2);
364 * There should be no references on the volume, no clusters, and
367 KKASSERT(volume->io.lock.refs == 0);
368 KKASSERT(RB_EMPTY(&volume->rb_clus_root));
369 KKASSERT(RB_EMPTY(&volume->rb_scls_root));
371 volume->ondisk = NULL;
374 vinvalbuf(volume->devvp, 0, 0, 0);
375 VOP_CLOSE(volume->devvp, FREAD);
377 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
378 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
383 * Destroy the structure
385 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
386 hammer_free_volume(volume);
392 hammer_free_volume(hammer_volume_t volume)
394 if (volume->vol_name) {
395 kfree(volume->vol_name, M_HAMMER);
396 volume->vol_name = NULL;
399 if (vn_isdisk(volume->devvp, NULL) &&
400 volume->devvp->v_rdev &&
401 volume->devvp->v_rdev->si_mountpoint == volume->hmp->mp
403 volume->devvp->v_rdev->si_mountpoint = NULL;
405 vrele(volume->devvp);
406 volume->devvp = NULL;
408 --hammer_count_volumes;
409 kfree(volume, M_HAMMER);
413 * Get a HAMMER volume. The volume must already exist.
416 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
418 struct hammer_volume *volume;
421 * Locate the volume structure
423 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
424 if (volume == NULL) {
428 hammer_ref(&volume->io.lock);
431 * Deal with on-disk info
433 if (volume->ondisk == NULL || volume->io.loading) {
434 *errorp = hammer_load_volume(volume);
436 hammer_rel_volume(volume, 1);
446 hammer_ref_volume(hammer_volume_t volume)
450 hammer_ref(&volume->io.lock);
453 * Deal with on-disk info
455 if (volume->ondisk == NULL || volume->io.loading) {
456 error = hammer_load_volume(volume);
458 hammer_rel_volume(volume, 1);
466 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
468 hammer_volume_t volume;
470 volume = hmp->rootvol;
471 KKASSERT(volume != NULL);
472 hammer_ref(&volume->io.lock);
475 * Deal with on-disk info
477 if (volume->ondisk == NULL || volume->io.loading) {
478 *errorp = hammer_load_volume(volume);
480 hammer_rel_volume(volume, 1);
490 * Load a volume's on-disk information. The volume must be referenced and
491 * not locked. We temporarily acquire an exclusive lock to interlock
492 * against releases or multiple get's.
495 hammer_load_volume(hammer_volume_t volume)
497 struct hammer_volume_ondisk *ondisk;
500 hammer_lock_ex(&volume->io.lock);
501 KKASSERT(volume->io.loading == 0);
502 volume->io.loading = 1;
504 if (volume->ondisk == NULL) {
505 error = hammer_io_read(volume->devvp, &volume->io);
507 volume->io.loading = 0;
508 hammer_unlock(&volume->io.lock);
511 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
514 * Configure the volume's A-lists. These are used to
517 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
518 volume->alist.config = &Vol_super_alist_config;
519 volume->alist.meta = ondisk->vol_almeta.super;
520 volume->alist.info = volume;
522 volume->alist.config = &Vol_normal_alist_config;
523 volume->alist.meta = ondisk->vol_almeta.normal;
524 volume->alist.info = NULL;
529 volume->io.loading = 0;
530 hammer_unlock(&volume->io.lock);
535 * Release a volume. Call hammer_io_release on the last reference. We have
536 * to acquire an exclusive lock to interlock against volume->ondisk tests
537 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
540 * Volumes are not unloaded from memory during normal operation.
543 hammer_rel_volume(hammer_volume_t volume, int flush)
545 if (volume->io.lock.refs == 1) {
546 hammer_lock_ex(&volume->io.lock);
547 if (volume->io.lock.refs == 1) {
548 volume->ondisk = NULL;
549 hammer_io_release(&volume->io, flush);
551 hammer_io_flush(&volume->io);
553 hammer_unlock(&volume->io.lock);
555 hammer_unref(&volume->io.lock);
558 /************************************************************************
560 ************************************************************************
562 * Manage super-clusters. Note that a supercl holds a reference to its
566 hammer_find_supercl(hammer_volume_t volume, int32_t scl_no)
568 if (RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no))
574 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
575 int *errorp, hammer_alloc_state_t isnew)
577 hammer_supercl_t supercl;
580 * Locate and lock the super-cluster structure, creating one
584 supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
585 if (supercl == NULL) {
586 ++hammer_count_supercls;
587 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
588 supercl->scl_no = scl_no;
589 supercl->volume = volume;
590 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
591 hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL);
592 hammer_ref(&supercl->io.lock);
595 * Insert the cluster into the RB tree and handle late
598 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
599 hammer_unref(&supercl->io.lock);
600 --hammer_count_supercls;
601 kfree(supercl, M_HAMMER);
604 hammer_ref(&volume->io.lock);
606 hammer_ref(&supercl->io.lock);
610 * Deal with on-disk info
612 if (supercl->ondisk == NULL || isnew || supercl->io.loading) {
613 *errorp = hammer_load_supercl(supercl, isnew);
615 hammer_rel_supercl(supercl, 1);
625 hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew)
627 struct hammer_supercl_ondisk *ondisk;
628 hammer_volume_t volume = supercl->volume;
632 hammer_lock_ex(&supercl->io.lock);
633 KKASSERT(supercl->io.loading == 0);
634 supercl->io.loading = 1;
636 if (supercl->ondisk == NULL) {
638 error = hammer_io_new(volume->devvp, &supercl->io);
640 error = hammer_io_read(volume->devvp, &supercl->io);
642 supercl->io.loading = 0;
643 hammer_unlock(&supercl->io.lock);
646 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
648 supercl->alist.config = &Supercl_alist_config;
649 supercl->alist.meta = ondisk->scl_meta;
650 supercl->alist.info = NULL;
652 error = hammer_io_new(volume->devvp, &supercl->io);
656 if (error == 0 && isnew) {
658 * If this is a new super-cluster we have to initialize
659 * various ondisk structural elements. The caller is
660 * responsible for the remainder.
662 struct hammer_alist_live dummy;
664 hammer_modify_supercl(supercl);
666 ondisk = supercl->ondisk;
667 dummy.config = &Buf_alist_config;
668 dummy.meta = ondisk->head.buf_almeta;
670 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
672 nclusters = volume->ondisk->vol_nclusters -
673 ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
674 KKASSERT(nclusters > 0);
675 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
676 nclusters = HAMMER_SCL_MAXCLUSTERS;
677 hammer_alist_init(&supercl->alist, 0, (int32_t)nclusters,
680 supercl->io.loading = 0;
681 hammer_unlock(&supercl->io.lock);
686 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
689 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
691 KKASSERT(supercl->io.lock.refs == 0);
692 hammer_ref(&supercl->io.lock);
693 hammer_rel_supercl(supercl, 2);
698 * Release a super-cluster. We have to deal with several places where
699 * another thread can ref the super-cluster.
701 * Only destroy the structure itself if the related buffer cache buffer
702 * was disassociated from it. This ties the management of the structure
703 * to the buffer cache subsystem.
706 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
708 hammer_volume_t volume;
710 if (supercl->io.lock.refs == 1) {
711 hammer_lock_ex(&supercl->io.lock);
712 if (supercl->io.lock.refs == 1) {
713 hammer_io_release(&supercl->io, flush);
714 if (supercl->io.bp == NULL &&
715 supercl->io.lock.refs == 1) {
716 volume = supercl->volume;
717 RB_REMOVE(hammer_scl_rb_tree,
718 &volume->rb_scls_root, supercl);
719 supercl->volume = NULL; /* sanity */
720 --hammer_count_supercls;
721 kfree(supercl, M_HAMMER);
722 hammer_rel_volume(volume, 0);
726 hammer_io_flush(&supercl->io);
728 hammer_unlock(&supercl->io.lock);
730 hammer_unref(&supercl->io.lock);
733 /************************************************************************
735 ************************************************************************
739 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
740 int *errorp, int getflags)
742 hammer_cluster_t cluster;
745 cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
746 if (cluster == NULL) {
747 ++hammer_count_clusters;
748 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
749 cluster->clu_no = clu_no;
750 cluster->volume = volume;
751 RB_INIT(&cluster->rb_bufs_root);
752 RB_INIT(&cluster->rb_nods_root);
753 hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER);
754 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
755 hammer_ref(&cluster->io.lock);
756 /* NOTE: cluster->io.validated expected to be 0 */
759 * Insert the cluster into the RB tree and handle late
762 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
763 hammer_unref(&cluster->io.lock);
764 --hammer_count_clusters;
765 kfree(cluster, M_HAMMER);
768 hammer_ref(&volume->io.lock);
770 hammer_ref(&cluster->io.lock);
774 * Deal with on-disk info
776 if (cluster->ondisk == NULL || getflags || cluster->io.validated == 0) {
777 *errorp = hammer_load_cluster(cluster, getflags);
779 hammer_rel_cluster(cluster, 1);
789 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
791 hammer_cluster_t cluster;
793 cluster = hmp->rootcl;
794 KKASSERT(cluster != NULL);
795 hammer_ref(&cluster->io.lock);
798 * Deal with on-disk info
800 if (cluster->ondisk == NULL || cluster->io.validated == 0) {
801 *errorp = hammer_load_cluster(cluster, 0);
803 hammer_rel_cluster(cluster, 1);
814 hammer_load_cluster(hammer_cluster_t cluster, int getflags)
816 hammer_volume_t volume = cluster->volume;
817 struct hammer_cluster_ondisk *ondisk;
820 hammer_lock_ex(&cluster->io.lock);
821 KKASSERT(cluster->io.loading == 0);
822 cluster->io.loading = 1;
824 if (cluster->ondisk == NULL) {
825 KKASSERT(TAILQ_EMPTY(&cluster->io.deplist));
828 * Unmodified buffers may be present, indicating that we
829 * had already validated the cluster even though we no longer
830 * have its ondisk info.
832 if (!RB_EMPTY(&cluster->rb_bufs_root))
833 KKASSERT(cluster->io.validated);
834 if (getflags & GET_CLUSTER_NEW)
835 error = hammer_io_new(volume->devvp, &cluster->io);
837 error = hammer_io_read(volume->devvp, &cluster->io);
839 cluster->io.loading = 0;
840 hammer_unlock(&cluster->io.lock);
843 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
845 cluster->alist_master.config = &Clu_master_alist_config;
846 cluster->alist_master.meta = ondisk->clu_master_meta;
847 cluster->alist_btree.config = &Clu_slave_alist_config;
848 cluster->alist_btree.meta = ondisk->clu_btree_meta;
849 cluster->alist_btree.info = cluster;
850 cluster->alist_record.config = &Clu_slave_alist_config;
851 cluster->alist_record.meta = ondisk->clu_record_meta;
852 cluster->alist_record.info = cluster;
853 cluster->alist_mdata.config = &Clu_slave_alist_config;
854 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
855 cluster->alist_mdata.info = cluster;
857 if ((getflags & GET_CLUSTER_NEW) == 0) {
859 * Load cluster range info for easy access
861 cluster->clu_btree_beg = ondisk->clu_btree_beg;
862 cluster->clu_btree_end = ondisk->clu_btree_end;
864 } else if (getflags & GET_CLUSTER_NEW) {
865 error = hammer_io_new(volume->devvp, &cluster->io);
869 if (error == 0 && (getflags & GET_CLUSTER_NEW)) {
871 * If this is a new cluster we have to initialize
872 * various ondisk structural elements. The caller is
873 * responsible for the remainder.
875 struct hammer_alist_live dummy;
877 hammer_volume_ondisk_t voldisk;
880 cluster->flags &= ~HAMMER_CLUSTER_DELETED;
882 hammer_modify_cluster(cluster);
883 ondisk = cluster->ondisk;
884 voldisk = volume->ondisk;
886 dummy.config = &Buf_alist_config;
887 dummy.meta = ondisk->head.buf_almeta;
889 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
891 ondisk->vol_fsid = voldisk->vol_fsid;
892 ondisk->vol_fstype = voldisk->vol_fstype;
894 ondisk->clu_id = 0; /* XXX */
895 ondisk->clu_no = cluster->clu_no;
896 ondisk->clu_flags = 0;
897 ondisk->clu_start = HAMMER_BUFSIZE;
898 ondisk->synchronized_rec_id = 1; /* XXX timestamp */
899 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
900 if (voldisk->vol_clo_end - cluster->io.offset >
901 voldisk->vol_clsize) {
902 ondisk->clu_limit = voldisk->vol_clsize;
904 ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
907 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
908 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
910 hammer_alist_init(&cluster->alist_btree,
911 HAMMER_FSBUF_MAXBLKS,
912 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
913 HAMMER_ASTATE_ALLOC);
914 hammer_alist_init(&cluster->alist_record,
915 HAMMER_FSBUF_MAXBLKS,
916 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
917 HAMMER_ASTATE_ALLOC);
918 hammer_alist_init(&cluster->alist_mdata,
919 HAMMER_FSBUF_MAXBLKS,
920 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
921 HAMMER_ASTATE_ALLOC);
923 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
924 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
925 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
928 * Initialize the B-Tree. We don't know what the caller
929 * intends to do with the cluster so make sure it causes
930 * an assertion if the caller makes no changes.
932 ondisk->clu_btree_parent_vol_no = -2;
933 ondisk->clu_btree_parent_clu_no = -2;
934 ondisk->clu_btree_parent_offset = -2;
935 ondisk->clu_btree_parent_clu_gen = -2;
937 croot = hammer_alloc_btree(cluster, &error);
939 hammer_modify_node(croot);
940 bzero(croot->ondisk, sizeof(*croot->ondisk));
941 croot->ondisk->count = 0;
942 croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
943 hammer_modify_cluster(cluster);
944 ondisk->clu_btree_root = croot->node_offset;
945 hammer_rel_node(croot);
949 * We just formatted this cluster, don't try to recover it!
951 cluster->io.validated = 1;
955 * If no error occured handle automatic cluster recovery unless
956 * the NORECOVER flag is passed (prevents recovery recursions) or
957 * the cluster has been flagged for deletion (prevents an attempt
958 * to recover a cluster which is no longer hooked into the tree).
960 * Setting hammer_debug_recover to 1 will force recovery on load
961 * whether or not the cluster is marked open.
963 * Setting hammer_debug_recover to -1 will force NO recovery
964 * regardless of state.
966 * io.validated can only be cleared if the buffer RB list is empty,
967 * preventing us from trying to recover an actively referenced
968 * cluster (which would blow the filesystem to smithereens).
970 if (error == 0 && cluster->io.validated == 0) {
971 if ((getflags & GET_CLUSTER_NORECOVER) == 0 &&
972 (cluster->flags & HAMMER_CLUSTER_DELETED) == 0) {
973 if ((cluster->ondisk->clu_flags & HAMMER_CLUF_OPEN) ||
974 hammer_debug_recover > 0) {
975 if (hammer_debug_recover >= 0)
976 hammer_recover(cluster);
978 cluster->io.validated = 1;
979 } else if ((cluster->ondisk->clu_flags & HAMMER_CLUF_OPEN)==0) {
980 cluster->io.validated = 1;
983 cluster->io.loading = 0;
984 hammer_unlock(&cluster->io.lock);
989 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
992 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
994 hammer_ref(&cluster->io.lock);
995 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
996 hammer_unload_buffer, NULL);
997 hammer_io_waitdep(&cluster->io);
998 KKASSERT(cluster->io.lock.refs == 1);
999 hammer_rel_cluster(cluster, 2);
1004 * Update the cluster's synchronization TID, which is used during cluster
1005 * recovery. NOTE: The cluster header is not written out until all related
1006 * records have been written out.
1009 hammer_alloc_recid(hammer_cluster_t cluster)
1013 hammer_modify_cluster(cluster);
1014 recid = cluster->ondisk->synchronized_rec_id++;
1020 hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid)
1022 hammer_modify_cluster(cluster);
1023 if (cluster->ondisk->synchronized_tid < tid)
1024 cluster->ondisk->synchronized_tid = tid;
1029 * Reference a cluster that is either already referenced or via a specially
1030 * handled pointer (aka rootcl).
1033 hammer_ref_cluster(hammer_cluster_t cluster)
1037 KKASSERT(cluster != NULL);
1038 hammer_ref(&cluster->io.lock);
1041 * Deal with on-disk info
1043 if (cluster->ondisk == NULL || cluster->io.validated == 0) {
1044 error = hammer_load_cluster(cluster, 0);
1046 hammer_rel_cluster(cluster, 1);
1054 * Release a cluster. We have to deal with several places where
1055 * another thread can ref the cluster.
1057 * Only destroy the structure itself if we no longer have an IO or any
1058 * hammer buffers associated with the structure.
1061 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
1063 hammer_volume_t volume;
1066 * Free a deleted cluster back to the pool when its last
1067 * active reference is released. This prevents the cluster
1068 * from being reallocated until all its prior references go away.
1070 * XXX implement a discard dependancy list which holds references
1071 * on clusters, preventing their deletion, until their parent cluster
1072 * has been flushed to disk.
1074 if (cluster->io.lock.refs == 1) {
1075 if (cluster->flags & HAMMER_CLUSTER_DELETED) {
1076 cluster->flags &= ~HAMMER_CLUSTER_DELETED;
1077 if (hammer_debug_general & 0x80)
1078 kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1079 if (cluster->ondisk->stat_records) {
1080 struct hammer_sync_info info;
1083 info.waitfor = MNT_WAIT;
1084 kprintf(" (still has %d records!)\n",
1085 cluster->ondisk->stat_records);
1086 Debugger("continue to recover cluster");
1087 hammer_recover(cluster);
1088 Debugger("continue to sync cluster");
1089 hammer_sync_cluster(cluster, &info);
1090 Debugger("now debug it");
1094 * Clean up any statistics we left hanging in the
1097 hammer_adjust_stats(cluster, HAMMER_FSBUF_BTREE,
1098 -cluster->ondisk->stat_idx_bufs);
1099 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA,
1100 -cluster->ondisk->stat_data_bufs);
1101 hammer_adjust_stats(cluster, HAMMER_FSBUF_RECORDS,
1102 -cluster->ondisk->stat_rec_bufs);
1104 * hammer_discard_cluster(cluster) - throw away
1105 * dirty backing store, recurse to any underlying
1108 hammer_free_cluster(cluster);
1112 if (cluster->io.lock.refs == 1) {
1113 hammer_lock_ex(&cluster->io.lock);
1114 if (cluster->io.lock.refs == 1) {
1116 * Release the I/O. If we or the kernel wants to
1117 * flush, this will release the bp. Otherwise the
1118 * bp may be written and flushed passively by the
1121 hammer_io_release(&cluster->io, flush);
1126 if (cluster != cluster->volume->hmp->rootcl &&
1127 cluster->io.bp == NULL &&
1128 cluster->io.lock.refs == 1 &&
1129 RB_EMPTY(&cluster->rb_bufs_root)) {
1130 KKASSERT(RB_EMPTY(&cluster->rb_nods_root));
1131 volume = cluster->volume;
1132 RB_REMOVE(hammer_clu_rb_tree,
1133 &volume->rb_clus_root, cluster);
1134 cluster->volume = NULL; /* sanity */
1135 --hammer_count_clusters;
1136 kfree(cluster, M_HAMMER);
1137 hammer_rel_volume(volume, 0);
1141 hammer_io_flush(&cluster->io);
1143 hammer_unlock(&cluster->io.lock);
1145 hammer_unref(&cluster->io.lock);
1148 /************************************************************************
1150 ************************************************************************
1152 * Manage buffers. Note that a buffer holds a reference to its associated
1153 * cluster, and its cluster will hold a reference to the cluster's volume.
1155 * A non-zero buf_type indicates that a new buffer should be created and
1159 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
1160 u_int64_t buf_type, int *errorp)
1162 hammer_buffer_t buffer;
1165 * Find the buffer. Note that buffer 0 corresponds to the cluster
1166 * header and should never be requested.
1168 KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1169 buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1172 * Locate and lock the buffer structure, creating one if necessary.
1175 buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1176 if (buffer == NULL) {
1177 ++hammer_count_buffers;
1178 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1179 buffer->buf_no = buf_no;
1180 buffer->cluster = cluster;
1181 buffer->volume = cluster->volume;
1182 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
1183 buffer->io.offset = cluster->io.offset +
1184 (buf_no * HAMMER_BUFSIZE);
1185 TAILQ_INIT(&buffer->clist);
1186 hammer_ref(&buffer->io.lock);
1189 * Insert the cluster into the RB tree and handle late
1192 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1193 hammer_unref(&buffer->io.lock);
1194 --hammer_count_buffers;
1195 kfree(buffer, M_HAMMER);
1198 hammer_ref(&cluster->io.lock);
1200 hammer_ref(&buffer->io.lock);
1204 * Deal with on-disk info
1206 if (buffer->ondisk == NULL || buf_type || buffer->io.loading) {
1207 *errorp = hammer_load_buffer(buffer, buf_type);
1209 hammer_rel_buffer(buffer, 1);
1219 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1221 hammer_volume_t volume;
1222 hammer_fsbuf_ondisk_t ondisk;
1226 * Load the buffer's on-disk info
1228 volume = buffer->volume;
1229 hammer_lock_ex(&buffer->io.lock);
1230 KKASSERT(buffer->io.loading == 0);
1231 buffer->io.loading = 1;
1233 if (buffer->ondisk == NULL) {
1235 error = hammer_io_new(volume->devvp, &buffer->io);
1237 error = hammer_io_read(volume->devvp, &buffer->io);
1240 buffer->io.loading = 0;
1241 hammer_unlock(&buffer->io.lock);
1244 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1245 buffer->alist.config = &Buf_alist_config;
1246 buffer->alist.meta = ondisk->head.buf_almeta;
1247 buffer->buf_type = ondisk->head.buf_type;
1248 } else if (buf_type) {
1249 error = hammer_io_new(volume->devvp, &buffer->io);
1253 if (error == 0 && buf_type) {
1254 hammer_modify_buffer(buffer);
1255 ondisk = buffer->ondisk;
1256 hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type);
1257 buffer->buf_type = ondisk->head.buf_type;
1259 buffer->io.loading = 0;
1260 hammer_unlock(&buffer->io.lock);
1265 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1268 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1270 hammer_ref(&buffer->io.lock);
1271 hammer_flush_buffer_nodes(buffer);
1272 KKASSERT(buffer->io.lock.refs == 1);
1273 hammer_rel_buffer(buffer, 2);
1278 * Reference a buffer that is either already referenced or via a specially
1279 * handled pointer (aka cursor->buffer).
1282 hammer_ref_buffer(hammer_buffer_t buffer)
1286 hammer_ref(&buffer->io.lock);
1287 if (buffer->ondisk == NULL || buffer->io.loading) {
1288 error = hammer_load_buffer(buffer, 0);
1290 hammer_rel_buffer(buffer, 1);
1292 * NOTE: buffer pointer can become stale after
1293 * the above release.
1296 KKASSERT(buffer->buf_type ==
1297 buffer->ondisk->head.buf_type);
1306 * Release a buffer. We have to deal with several places where
1307 * another thread can ref the buffer.
1309 * Only destroy the structure itself if the related buffer cache buffer
1310 * was disassociated from it. This ties the management of the structure
1311 * to the buffer cache subsystem. buffer->ondisk determines whether the
1312 * embedded io is referenced or not.
1315 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1317 hammer_cluster_t cluster;
1319 if (buffer->io.lock.refs == 1) {
1320 hammer_lock_ex(&buffer->io.lock);
1321 if (buffer->io.lock.refs == 1) {
1322 hammer_io_release(&buffer->io, flush);
1324 if (buffer->io.bp == NULL &&
1325 buffer->io.lock.refs == 1) {
1326 hammer_flush_buffer_nodes(buffer);
1327 KKASSERT(TAILQ_EMPTY(&buffer->clist));
1328 cluster = buffer->cluster;
1329 RB_REMOVE(hammer_buf_rb_tree,
1330 &cluster->rb_bufs_root, buffer);
1331 buffer->cluster = NULL; /* sanity */
1332 --hammer_count_buffers;
1333 kfree(buffer, M_HAMMER);
1334 hammer_rel_cluster(cluster, 0);
1338 hammer_io_flush(&buffer->io);
1340 hammer_unlock(&buffer->io.lock);
1342 hammer_unref(&buffer->io.lock);
1345 /************************************************************************
1347 ************************************************************************
1349 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1350 * method used by the HAMMER filesystem.
1352 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1353 * associated with its buffer, and will only referenced the buffer while
1354 * the node itself is referenced.
1356 * A hammer_node can also be passively associated with other HAMMER
1357 * structures, such as inodes, while retaining 0 references. These
1358 * associations can be cleared backwards using a pointer-to-pointer in
1361 * This allows the HAMMER implementation to cache hammer_nodes long-term
1362 * and short-cut a great deal of the infrastructure's complexity. In
1363 * most cases a cached node can be reacquired without having to dip into
1364 * either the buffer or cluster management code.
1366 * The caller must pass a referenced cluster on call and will retain
1367 * ownership of the reference on return. The node will acquire its own
1368 * additional references, if necessary.
1371 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1376 * Locate the structure, allocating one if necessary.
1379 node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1382 ++hammer_count_nodes;
1383 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1384 node->node_offset = node_offset;
1385 node->cluster = cluster;
1386 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1388 --hammer_count_nodes;
1389 kfree(node, M_HAMMER);
1393 hammer_ref(&node->lock);
1394 *errorp = hammer_load_node(node);
1396 hammer_rel_node(node);
1403 * Reference an already-referenced node.
1406 hammer_ref_node(hammer_node_t node)
1410 KKASSERT(node->lock.refs > 0);
1411 hammer_ref(&node->lock);
1412 if ((error = hammer_load_node(node)) != 0)
1413 hammer_rel_node(node);
1418 * Load a node's on-disk data reference.
1421 hammer_load_node(hammer_node_t node)
1423 hammer_buffer_t buffer;
1430 hammer_lock_ex(&node->lock);
1431 if (node->ondisk == NULL) {
1433 * This is a little confusing but the jist is that
1434 * node->buffer determines whether the node is on
1435 * the buffer's clist and node->ondisk determines
1436 * whether the buffer is referenced.
1438 if ((buffer = node->buffer) != NULL) {
1439 error = hammer_ref_buffer(buffer);
1441 buf_no = node->node_offset / HAMMER_BUFSIZE;
1442 buffer = hammer_get_buffer(node->cluster,
1445 KKASSERT(error == 0);
1446 TAILQ_INSERT_TAIL(&buffer->clist,
1448 node->buffer = buffer;
1452 node->ondisk = (void *)((char *)buffer->ondisk +
1453 (node->node_offset & HAMMER_BUFMASK));
1456 hammer_unlock(&node->lock);
1461 * Safely reference a node, interlock against flushes via the IO subsystem.
1464 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
1469 if ((node = *cache) != NULL)
1470 hammer_ref(&node->lock);
1472 *errorp = hammer_load_node(node);
1474 hammer_rel_node(node);
1484 * Release a hammer_node. On the last release the node dereferences
1485 * its underlying buffer and may or may not be destroyed.
1488 hammer_rel_node(hammer_node_t node)
1490 hammer_cluster_t cluster;
1491 hammer_buffer_t buffer;
1492 int32_t node_offset;
1496 * If this isn't the last ref just decrement the ref count and
1499 if (node->lock.refs > 1) {
1500 hammer_unref(&node->lock);
1505 * If there is no ondisk info or no buffer the node failed to load,
1506 * remove the last reference and destroy the node.
1508 if (node->ondisk == NULL) {
1509 hammer_unref(&node->lock);
1510 hammer_flush_node(node);
1511 /* node is stale now */
1516 * Do final cleanups and then either destroy the node and leave it
1517 * passively cached. The buffer reference is removed regardless.
1519 buffer = node->buffer;
1520 node->ondisk = NULL;
1522 if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
1523 hammer_unref(&node->lock);
1524 hammer_rel_buffer(buffer, 0);
1529 * Destroy the node. Record pertainant data because the node
1530 * becomes stale the instant we flush it.
1532 flags = node->flags;
1533 node_offset = node->node_offset;
1534 hammer_unref(&node->lock);
1535 hammer_flush_node(node);
1538 cluster = buffer->cluster;
1539 if (flags & HAMMER_NODE_DELETED) {
1540 if (node_offset == cluster->ondisk->clu_btree_root)
1541 KKASSERT(cluster->flags & HAMMER_CLUSTER_DELETED);
1542 hammer_free_btree(cluster, node_offset);
1544 hammer_rel_buffer(buffer, 0);
1548 * Passively cache a referenced hammer_node in *cache. The caller may
1549 * release the node on return.
1552 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1557 * If the node is being deleted, don't cache it!
1559 if (node->flags & HAMMER_NODE_DELETED)
1563 * Cache the node. If we previously cached a different node we
1564 * have to give HAMMER a chance to destroy it.
1567 if (node->cache1 != cache) {
1568 if (node->cache2 != cache) {
1569 if ((old = *cache) != NULL) {
1570 KKASSERT(node->lock.refs != 0);
1571 hammer_uncache_node(cache);
1575 *node->cache2 = NULL;
1576 node->cache2 = node->cache1;
1577 node->cache1 = cache;
1580 struct hammer_node **tmp;
1582 node->cache1 = node->cache2;
1589 hammer_uncache_node(struct hammer_node **cache)
1593 if ((node = *cache) != NULL) {
1595 if (node->cache1 == cache) {
1596 node->cache1 = node->cache2;
1597 node->cache2 = NULL;
1598 } else if (node->cache2 == cache) {
1599 node->cache2 = NULL;
1601 panic("hammer_uncache_node: missing cache linkage");
1603 if (node->cache1 == NULL && node->cache2 == NULL)
1604 hammer_flush_node(node);
1609 * Remove a node's cache references and destroy the node if it has no
1610 * other references or backing store.
1613 hammer_flush_node(hammer_node_t node)
1615 hammer_buffer_t buffer;
1618 *node->cache1 = NULL;
1620 *node->cache2 = NULL;
1621 if (node->lock.refs == 0 && node->ondisk == NULL) {
1622 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1624 if ((buffer = node->buffer) != NULL) {
1625 node->buffer = NULL;
1626 TAILQ_REMOVE(&buffer->clist, node, entry);
1627 /* buffer is unreferenced because ondisk is NULL */
1629 --hammer_count_nodes;
1630 kfree(node, M_HAMMER);
1635 * Flush passively cached B-Tree nodes associated with this buffer.
1636 * This is only called when the buffer is about to be destroyed, so
1637 * none of the nodes should have any references.
1640 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1644 while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1645 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1646 hammer_ref(&node->lock);
1647 node->flags |= HAMMER_NODE_FLUSH;
1648 hammer_rel_node(node);
1652 /************************************************************************
1653 * A-LIST ALLOCATORS *
1654 ************************************************************************/
1657 * Allocate HAMMER clusters
1660 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1663 hammer_volume_t volume;
1664 hammer_cluster_t cluster;
1671 * Figure out our starting volume and hint.
1674 vol_beg = cluster_hint->volume->vol_no;
1675 clu_hint = cluster_hint->clu_no;
1677 vol_beg = hmp->volume_iterator;
1682 * Loop through volumes looking for a free cluster. If allocating
1683 * a new cluster relative to an existing cluster try to find a free
1684 * cluster on either side (clu_hint >= 0), otherwise just do a
1685 * forwards iteration.
1689 volume = hammer_get_volume(hmp, vol_no, errorp);
1691 clu_no = HAMMER_ALIST_BLOCK_NONE;
1694 hammer_modify_volume(volume);
1695 if (clu_hint == -1) {
1696 clu_hint = volume->clu_iterator;
1697 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1699 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1700 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1704 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1706 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1707 clu_no = hammer_alist_alloc_rev(&volume->alist,
1711 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1713 hammer_rel_volume(volume, 0);
1716 vol_no = (vol_no + 1) % hmp->nvolumes;
1718 } while (vol_no != vol_beg);
1721 * Acquire the cluster. On success this will force *errorp to 0.
1723 if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1724 if (hammer_debug_general & 0x40) {
1725 kprintf("ALLOC CLUSTER %d:%d\n",
1726 volume->vol_no, clu_no);
1728 cluster = hammer_get_cluster(volume, clu_no, errorp,
1730 volume->clu_iterator = clu_no;
1731 hammer_rel_volume(volume, 0);
1736 hammer_lock_ex(&cluster->io.lock);
1741 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound,
1742 hammer_base_elm_t right_bound)
1744 hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1746 hammer_modify_cluster(cluster);
1747 ondisk->clu_btree_beg = *left_bound;
1748 ondisk->clu_btree_end = *right_bound;
1749 cluster->clu_btree_beg = ondisk->clu_btree_beg;
1750 cluster->clu_btree_end = ondisk->clu_btree_end;
1754 * Deallocate a cluster
1757 hammer_free_cluster(hammer_cluster_t cluster)
1759 hammer_modify_volume(cluster->volume);
1760 hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1764 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1766 * The passed *bufferp should be initialized to NULL. On successive calls
1767 * *bufferp caches the most recent buffer used until put away by the caller.
1768 * Note that previously returned pointers using the cached buffer become
1769 * invalid on successive calls which reuse *bufferp.
1771 * All allocations first attempt to use the block found at the specified
1772 * iterator. If that fails the first available block is used. If that
1773 * fails a new buffer is allocated and associated with the buffer type
1774 * A-list and the element is allocated out of the new buffer.
1776 * This function also ensures that the required minimum number of buffers is
1777 * reserved to guarantee that recovery operations succeed.
1781 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1783 hammer_buffer_t buffer;
1784 hammer_alist_t live;
1788 int32_t node_offset;
1791 hammer_modify_cluster(cluster);
1793 live = &cluster->alist_btree;
1796 * If we aren't recovering then ensure the required minimum
1797 * reservation is met. XXX if the recovery code packs the B-Tree
1798 * we don't have to do this.
1800 * Calculate the number of buffers needed to hold the B-Tree.
1802 if (cluster->io.validated) {
1803 n = (cluster->ondisk->stat_records * 3 /
1804 HAMMER_BTREE_INT_ELMS / HAMMER_BTREE_NODES) + 1;
1805 if (hammer_debug_general &&
1806 cluster->ondisk->stat_idx_bufs < n) {
1807 kprintf("hammer_alloc_btree: %d/%d buffers\n",
1808 cluster->ondisk->stat_idx_bufs, n);
1810 while (cluster->ondisk->stat_idx_bufs < n) {
1811 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1812 cluster->ondisk->idx_index, errorp,
1816 hammer_rel_buffer(buffer, 0);
1824 * Allocate a B-Tree element
1826 elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1827 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1828 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1829 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1830 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1831 cluster->ondisk->idx_index, errorp, &buffer);
1832 elm_no = hammer_alist_alloc(live, 1);
1833 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1836 hammer_rel_buffer(buffer, 0);
1840 cluster->ondisk->idx_index = elm_no;
1841 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1844 * Load and return the B-Tree element
1846 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1847 node_offset = buf_no * HAMMER_BUFSIZE +
1848 offsetof(union hammer_fsbuf_ondisk,
1849 btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1850 node = hammer_get_node(cluster, node_offset, errorp);
1852 hammer_modify_node(node);
1853 bzero(node->ondisk, sizeof(*node->ondisk));
1854 KKASSERT((node->flags & (HAMMER_NODE_DELETED)) == 0);
1856 hammer_alist_free(live, elm_no, 1);
1857 hammer_rel_node(node);
1861 hammer_rel_buffer(buffer, 0);
1866 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1867 int *errorp, struct hammer_buffer **bufferp)
1869 hammer_buffer_t buffer;
1870 hammer_alist_t live;
1877 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1878 * for these allocations.
1880 hammer_modify_cluster(cluster);
1881 if ((bytes & HAMMER_BUFMASK) == 0) {
1882 nblks = bytes / HAMMER_BUFSIZE;
1883 /* only one block allowed for now (so buffer can hold it) */
1884 KKASSERT(nblks == 1);
1886 buf_no = hammer_alloc_master(cluster, nblks,
1887 cluster->ondisk->idx_ldata, 1);
1888 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1892 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1893 cluster->ondisk->idx_ldata = buf_no;
1895 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1897 hammer_rel_buffer(buffer, 0);
1899 return(buffer->ondisk);
1903 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1904 * (64 bytes) for these allocations.
1906 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1907 nblks /= HAMMER_DATA_BLKSIZE;
1908 live = &cluster->alist_mdata;
1909 elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1910 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1911 elm_no = hammer_alist_alloc_fwd(live, nblks, 0);
1912 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1913 alloc_new_buffer(cluster, HAMMER_FSBUF_DATA, live,
1914 cluster->ondisk->idx_data, errorp, bufferp);
1915 elm_no = hammer_alist_alloc(live, nblks);
1916 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1921 cluster->ondisk->idx_index = elm_no;
1924 * Load and return the B-Tree element
1926 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1928 if (buffer == NULL || buffer->cluster != cluster ||
1929 buffer->buf_no != buf_no) {
1931 hammer_rel_buffer(buffer, 0);
1932 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1935 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1936 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1937 hammer_modify_buffer(buffer);
1938 item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1939 bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1945 hammer_alloc_record(hammer_cluster_t cluster, int *errorp,
1946 u_int8_t rec_type, struct hammer_buffer **bufferp)
1948 hammer_buffer_t buffer;
1949 hammer_alist_t live;
1955 * Allocate a record element
1957 hammer_modify_cluster(cluster);
1958 live = &cluster->alist_record;
1959 elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1960 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1961 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1962 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1963 alloc_new_buffer(cluster, HAMMER_FSBUF_RECORDS, live,
1964 cluster->ondisk->idx_record, errorp, bufferp);
1965 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1966 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1971 cluster->ondisk->idx_record = elm_no;
1974 * Load and return the record element
1976 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1978 if (buffer == NULL || buffer->cluster != cluster ||
1979 buffer->buf_no != buf_no) {
1981 hammer_rel_buffer(buffer, 0);
1982 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1985 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1986 KASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES,
1987 ("elm_no %d (%d) out of bounds", elm_no, elm_no & HAMMER_FSBUF_BLKMASK));
1988 hammer_modify_buffer(buffer);
1989 item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1990 bzero(item, sizeof(union hammer_record_ondisk));
1992 ++cluster->ondisk->stat_records;
1993 if (rec_type == HAMMER_RECTYPE_CLUSTER)
1994 ++cluster->ondisk->stat_records;
1999 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
2003 hammer_alist_t live;
2005 hammer_modify_cluster(buffer->cluster);
2006 if ((bytes & HAMMER_BUFMASK) == 0) {
2007 nblks = bytes / HAMMER_BUFSIZE;
2008 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
2009 hammer_alist_free(&buffer->cluster->alist_master,
2010 buffer->buf_no, nblks);
2011 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
2015 elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
2016 HAMMER_DATA_BLKSIZE;
2017 KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
2018 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
2019 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
2020 nblks /= HAMMER_DATA_BLKSIZE;
2021 live = &buffer->cluster->alist_mdata;
2022 hammer_alist_free(live, elm_no, nblks);
2026 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec,
2030 hammer_alist_t live;
2032 hammer_modify_cluster(buffer->cluster);
2033 elm_no = rec - &buffer->ondisk->record.recs[0];
2034 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
2035 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
2036 live = &buffer->cluster->alist_record;
2037 hammer_alist_free(live, elm_no, 1);
2038 --buffer->cluster->ondisk->stat_records;
2039 if (rec_type == HAMMER_RECTYPE_CLUSTER)
2040 --buffer->cluster->ondisk->stat_records;
2044 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
2046 const int32_t blksize = sizeof(struct hammer_node_ondisk);
2047 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
2048 hammer_alist_t live;
2051 hammer_modify_cluster(cluster);
2052 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
2053 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
2054 live = &cluster->alist_btree;
2055 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
2056 elm_no += fsbuf_offset / blksize;
2057 hammer_alist_free(live, elm_no, 1);
2061 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
2063 const int32_t blksize = HAMMER_DATA_BLKSIZE;
2064 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
2065 hammer_alist_t live;
2070 hammer_modify_cluster(cluster);
2071 if ((bytes & HAMMER_BUFMASK) == 0) {
2072 nblks = bytes / HAMMER_BUFSIZE;
2073 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
2074 buf_no = bclu_offset / HAMMER_BUFSIZE;
2075 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
2076 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
2080 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
2081 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
2082 live = &cluster->alist_mdata;
2083 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
2084 nblks /= HAMMER_DATA_BLKSIZE;
2085 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
2086 elm_no += fsbuf_offset / blksize;
2087 hammer_alist_free(live, elm_no, nblks);
2091 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset,
2094 const int32_t blksize = sizeof(union hammer_record_ondisk);
2095 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
2096 hammer_alist_t live;
2099 hammer_modify_cluster(cluster);
2100 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
2101 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
2102 live = &cluster->alist_record;
2103 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
2104 elm_no += fsbuf_offset / blksize;
2105 hammer_alist_free(live, elm_no, 1);
2106 --cluster->ondisk->stat_records;
2107 if (rec_type == HAMMER_RECTYPE_CLUSTER)
2108 --cluster->ondisk->stat_records;
2113 * Allocate a new filesystem buffer and assign it to the specified
2114 * filesystem buffer type. The new buffer will be added to the
2115 * type-specific A-list and initialized.
2118 alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live,
2119 int start, int *errorp, struct hammer_buffer **bufferp)
2121 hammer_buffer_t buffer;
2127 hammer_rel_buffer(*bufferp, 0);
2130 start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */
2131 isfwd = (type != HAMMER_FSBUF_RECORDS);
2132 buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
2133 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2139 * The new buffer must be initialized (type != 0) regardless of
2140 * whether we already have it cached or not, so don't try to
2141 * optimize the cached buffer check. Just call hammer_get_buffer().
2143 buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
2147 * Do a meta-free of the buffer's elements into the type-specific
2148 * A-list and update our statistics to reflect the allocation.
2151 hammer_modify_buffer(buffer); /*XXX*/
2152 hammer_adjust_stats(cluster, type, 1);
2155 * Free the buffer to the appropriate slave list so the
2156 * cluster-based allocator sees it.
2158 base_blk = buf_no * HAMMER_FSBUF_MAXBLKS;
2161 case HAMMER_FSBUF_BTREE:
2162 hammer_alist_free(live, base_blk, HAMMER_BTREE_NODES);
2164 case HAMMER_FSBUF_DATA:
2165 hammer_alist_free(live, base_blk, HAMMER_DATA_NODES);
2167 case HAMMER_FSBUF_RECORDS:
2168 hammer_alist_free(live, base_blk, HAMMER_RECORD_NODES);
2175 * Sync dirty buffers to the media
2178 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2179 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2182 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2184 struct hammer_sync_info info;
2187 info.waitfor = waitfor;
2189 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2190 hammer_sync_scan1, hammer_sync_scan2, &info);
2192 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2193 hammer_sync_volume, &info);
2198 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2200 struct hammer_inode *ip;
2203 if (vp->v_type == VNON || ip == NULL ||
2204 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2205 RB_EMPTY(&vp->v_rbdirty_tree))) {
2212 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2214 struct hammer_sync_info *info = data;
2215 struct hammer_inode *ip;
2219 if (vp->v_type == VNON || vp->v_type == VBAD ||
2220 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2221 RB_EMPTY(&vp->v_rbdirty_tree))) {
2224 error = VOP_FSYNC(vp, info->waitfor);
2226 info->error = error;
2231 hammer_sync_volume(hammer_volume_t volume, void *data)
2233 struct hammer_sync_info *info = data;
2235 hammer_ref(&volume->io.lock);
2236 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2237 hammer_sync_cluster, info);
2238 hammer_rel_volume(volume, 1);
2243 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2245 struct hammer_sync_info *info = data;
2248 * XXX check if cluster deleted and don't bother to sync it?
2250 hammer_ref(&cluster->io.lock);
2251 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2252 hammer_sync_buffer, info);
2253 /*hammer_io_waitdep(&cluster->io);*/
2254 hammer_rel_cluster(cluster, 1);
2259 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
2261 hammer_ref(&buffer->io.lock);
2262 hammer_rel_buffer(buffer, 1);
2267 * Generic buffer initialization. Initialize the A-list into an all-allocated
2268 * state with the free block limit properly set.
2270 * Note that alloc_new_buffer() will free the appropriate block range via
2271 * the appropriate cluster alist, so the free count is properly propogated.
2274 hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2276 head->buf_type = type;
2279 case HAMMER_FSBUF_BTREE:
2280 hammer_alist_init(live, 0, HAMMER_BTREE_NODES,
2281 HAMMER_ASTATE_ALLOC);
2283 case HAMMER_FSBUF_DATA:
2284 hammer_alist_init(live, 0, HAMMER_DATA_NODES,
2285 HAMMER_ASTATE_ALLOC);
2287 case HAMMER_FSBUF_RECORDS:
2288 hammer_alist_init(live, 0, HAMMER_RECORD_NODES,
2289 HAMMER_ASTATE_ALLOC);
2292 hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
2298 * Calculate the cluster's offset in the volume. This calculation is
2299 * slightly more complex when using superclusters because superclusters
2300 * are grouped in blocks of 16, followed by 16 x N clusters where N
2301 * is the number of clusters a supercluster can manage.
2304 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2307 int64_t scl_group_size;
2310 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2311 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2312 HAMMER_SCL_MAXCLUSTERS;
2314 ((int64_t)HAMMER_BUFSIZE *
2315 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2316 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2317 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2319 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2321 off = volume->cluster_base +
2322 scl_group * scl_group_size +
2323 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2324 ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2325 HAMMER_VOL_SUPERCLUSTER_GROUP))
2326 * volume->vol_clsize;
2328 off = volume->cluster_base +
2329 (int64_t)clu_no * volume->vol_clsize;
2335 * Calculate a super-cluster's offset in the volume.
2338 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2342 int64_t scl_group_size;
2344 KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2345 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2348 ((int64_t)HAMMER_BUFSIZE *
2349 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2350 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2351 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2353 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2354 off = volume->cluster_base + (scl_group * scl_group_size) +
2355 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2357 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2363 * Allocate nblks buffers from the cluster's master alist.
2366 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2367 int32_t start, int isfwd)
2371 hammer_modify_cluster(cluster);
2373 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2375 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2376 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2380 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2382 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2383 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2384 nblks, HAMMER_ALIST_BLOCK_MAX);
2389 * Recover space from empty record, b-tree, and data a-lists.
2396 * Adjust allocation statistics
2399 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2404 hammer_modify_cluster(cluster);
2405 hammer_modify_volume(cluster->volume);
2406 hammer_modify_volume(cluster->volume->hmp->rootvol);
2409 case HAMMER_FSBUF_BTREE:
2410 cluster->ondisk->stat_idx_bufs += nblks;
2411 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2412 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2414 case HAMMER_FSBUF_DATA:
2415 cluster->ondisk->stat_data_bufs += nblks;
2416 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2417 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2419 case HAMMER_FSBUF_RECORDS:
2420 cluster->ondisk->stat_rec_bufs += nblks;
2421 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2422 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2430 * Setup the parameters for the various A-lists we use in hammer. The
2431 * supercluster A-list must be chained to the cluster A-list and cluster
2432 * slave A-lists are chained to buffer A-lists.
2434 * See hammer_init_alist_config() below.
2438 * A-LIST - cluster recursion into a filesystem buffer
2440 * In the init case the buffer has already been initialized by
2441 * alloc_new_buffer() when it allocated the buffer out of the master
2442 * alist and marked it as free in the slave alist.
2444 * Because we use a somewhat odd mechanism to assign buffers to slave
2445 * pools we can't actually free the buffer back to the master alist in
2446 * buffer_alist_destroy(), but instead must deal with that logic somewhere
2450 buffer_alist_init(void *info, int32_t blk, int32_t radix,
2451 hammer_alloc_state_t state)
2457 * Note: This routine is only called when freeing the last elements of
2458 * an initialized buffer. Freeing all elements of the buffer when the
2459 * buffer was not previously initialized does not call this routine.
2462 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2464 hammer_cluster_t cluster = info;
2467 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2468 if (hammer_debug_general & 0x80) {
2469 kprintf("destroy buffer %d:%d:%d\n",
2470 cluster->volume->vol_no, cluster->clu_no, buf_no);
2476 * Note: atblk can be negative and atblk - blk can go negative.
2479 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2480 int32_t count, int32_t atblk, int32_t *fullp)
2482 hammer_cluster_t cluster = info;
2483 hammer_buffer_t buffer;
2488 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2489 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2491 KKASSERT(buffer->ondisk->head.buf_type != 0);
2493 hammer_modify_buffer(buffer);
2494 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2495 if (r != HAMMER_ALIST_BLOCK_NONE)
2497 *fullp = hammer_alist_isfull(&buffer->alist);
2498 hammer_rel_buffer(buffer, 0);
2500 r = HAMMER_ALIST_BLOCK_NONE;
2507 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2508 int32_t count, int32_t atblk, int32_t *fullp)
2510 hammer_cluster_t cluster = info;
2511 hammer_buffer_t buffer;
2516 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2517 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2519 KKASSERT(buffer->ondisk->head.buf_type != 0);
2520 hammer_modify_buffer(buffer);
2521 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2522 if (r != HAMMER_ALIST_BLOCK_NONE)
2524 *fullp = hammer_alist_isfull(&buffer->alist);
2525 hammer_rel_buffer(buffer, 0);
2527 r = HAMMER_ALIST_BLOCK_NONE;
2534 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2535 int32_t base_blk, int32_t count, int32_t *emptyp)
2537 hammer_cluster_t cluster = info;
2538 hammer_buffer_t buffer;
2542 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2543 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2545 KKASSERT(buffer->ondisk->head.buf_type != 0);
2546 hammer_modify_buffer(buffer);
2547 hammer_alist_free(&buffer->alist, base_blk, count);
2548 *emptyp = hammer_alist_isempty(&buffer->alist);
2549 hammer_rel_buffer(buffer, 0);
2556 buffer_alist_find(void *info, int32_t blk, int32_t radix, int32_t atblk,
2559 hammer_cluster_t cluster = info;
2560 hammer_buffer_t buffer;
2565 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2566 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2568 KKASSERT(buffer->ondisk->head.buf_type != 0);
2569 switch(buffer->ondisk->head.buf_type) {
2570 case HAMMER_FSBUF_RECORDS:
2571 maxblks = HAMMER_RECORD_NODES;
2573 case HAMMER_FSBUF_BTREE:
2574 maxblks = HAMMER_BTREE_NODES;
2576 case HAMMER_FSBUF_DATA:
2577 maxblks = HAMMER_DATA_NODES;
2580 panic("buffer_alist_find: unknown buffer type");
2584 blk = hammer_alist_find(&buffer->alist, atblk - blk, maxblks,
2586 hammer_rel_buffer(buffer, 0);
2588 blk = HAMMER_ALIST_BLOCK_NONE;
2594 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2599 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2600 * into a filesystem buffer. A-List's are mostly self-contained entities,
2601 * but callbacks must be installed to recurse from one A-List to another.
2603 * Implementing these callbacks allows us to operate a multi-layered A-List
2604 * as a single entity.
2608 * This occurs when allocating a cluster via the volume a-list and the
2609 * entry in the volume a-list indicated all-free. The underlying supercl
2610 * has not yet been initialized.
2613 super_alist_init(void *info, int32_t blk, int32_t radix,
2614 hammer_alloc_state_t state)
2616 hammer_volume_t volume = info;
2617 hammer_supercl_t supercl;
2622 * Calculate the super-cluster number containing the cluster (blk)
2623 * and obtain the super-cluster buffer.
2625 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2626 supercl = hammer_get_supercl(volume, scl_no, &error, state);
2628 hammer_rel_supercl(supercl, 0);
2633 super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2635 hammer_volume_t volume = info;
2636 hammer_supercl_t supercl;
2641 * Calculate the super-cluster number containing the cluster (blk)
2642 * and obtain the super-cluster buffer.
2644 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2645 supercl = hammer_get_supercl(volume, scl_no, &error,
2646 HAMMER_ASTATE_NONE);
2648 hammer_modify_supercl(supercl);
2649 error = hammer_alist_recover(&supercl->alist, blk, 0, count);
2650 /* free block count is returned if >= 0 */
2651 hammer_rel_supercl(supercl, 0);
2659 * This occurs when freeing a cluster via the volume a-list and the
2660 * supercl is now 100% free. We can destroy the supercl.
2662 * What we actually do is just unset the modify bit so it doesn't get
2666 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2668 hammer_volume_t volume = info;
2669 hammer_supercl_t supercl;
2674 * Calculate the super-cluster number containing the cluster (blk)
2675 * and obtain the super-cluster buffer.
2677 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2678 if (hammer_find_supercl(volume, scl_no)) {
2679 supercl = hammer_get_supercl(volume, scl_no, &error,
2680 HAMMER_ASTATE_FREE);
2683 hammer_io_clear_modify(&supercl->io);
2684 hammer_rel_supercl(supercl, 0);
2691 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2692 int32_t count, int32_t atblk, int32_t *fullp)
2694 hammer_volume_t volume = info;
2695 hammer_supercl_t supercl;
2700 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2701 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2703 hammer_modify_supercl(supercl);
2704 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2705 if (r != HAMMER_ALIST_BLOCK_NONE)
2707 *fullp = hammer_alist_isfull(&supercl->alist);
2708 hammer_rel_supercl(supercl, 0);
2710 r = HAMMER_ALIST_BLOCK_NONE;
2717 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2718 int32_t count, int32_t atblk, int32_t *fullp)
2720 hammer_volume_t volume = info;
2721 hammer_supercl_t supercl;
2726 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2727 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2729 hammer_modify_supercl(supercl);
2730 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2731 if (r != HAMMER_ALIST_BLOCK_NONE)
2733 *fullp = hammer_alist_isfull(&supercl->alist);
2734 hammer_rel_supercl(supercl, 0);
2736 r = HAMMER_ALIST_BLOCK_NONE;
2743 super_alist_free(void *info, int32_t blk, int32_t radix,
2744 int32_t base_blk, int32_t count, int32_t *emptyp)
2746 hammer_volume_t volume = info;
2747 hammer_supercl_t supercl;
2751 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2752 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2754 hammer_modify_supercl(supercl);
2755 hammer_alist_free(&supercl->alist, base_blk, count);
2756 *emptyp = hammer_alist_isempty(&supercl->alist);
2757 hammer_rel_supercl(supercl, 0);
2764 super_alist_find(void *info, int32_t blk, int32_t radix, int32_t atblk,
2767 hammer_volume_t volume = info;
2768 hammer_supercl_t supercl;
2773 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2774 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2776 nclusters = supercl->volume->ondisk->vol_nclusters -
2777 ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
2778 KKASSERT(nclusters > 0);
2779 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
2780 nclusters = HAMMER_SCL_MAXCLUSTERS;
2781 blk = hammer_alist_find(&supercl->alist, atblk - blk,
2783 hammer_rel_supercl(supercl, 0);
2785 blk = HAMMER_ALIST_BLOCK_NONE;
2791 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2796 hammer_init_alist_config(void)
2798 hammer_alist_config_t config;
2800 hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2801 1, HAMMER_FSBUF_METAELMS, 0);
2802 hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2803 1, HAMMER_VOL_METAELMS_1LYR, 0);
2804 hammer_alist_template(&Vol_super_alist_config,
2805 HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2806 HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR,
2808 hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2809 1, HAMMER_SUPERCL_METAELMS, 0);
2810 hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2811 1, HAMMER_CLU_MASTER_METAELMS, 0);
2812 hammer_alist_template(&Clu_slave_alist_config,
2813 HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2814 HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS,
2817 config = &Vol_super_alist_config;
2818 config->bl_radix_init = super_alist_init;
2819 config->bl_radix_recover = super_alist_recover;
2820 config->bl_radix_destroy = super_alist_destroy;
2821 config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2822 config->bl_radix_alloc_rev = super_alist_alloc_rev;
2823 config->bl_radix_free = super_alist_free;
2824 config->bl_radix_find = super_alist_find;
2825 config->bl_radix_print = super_alist_print;
2827 config = &Clu_slave_alist_config;
2828 config->bl_radix_init = buffer_alist_init;
2829 config->bl_radix_recover = buffer_alist_recover;
2830 config->bl_radix_destroy = buffer_alist_destroy;
2831 config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2832 config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2833 config->bl_radix_free = buffer_alist_free;
2834 config->bl_radix_find = buffer_alist_find;
2835 config->bl_radix_print = buffer_alist_print;