2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.10 2007/12/14 08:05:39 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58 hammer_alist_t live, u_int64_t type, int32_t nelements,
60 int *errorp, struct hammer_buffer **bufferp);
62 static void readhammerbuf(hammer_volume_t vol, void *data,
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
70 int32_t start, int isfwd);
71 static void hammer_adjust_stats(hammer_cluster_t cluster,
72 u_int64_t buf_type, int nblks);
74 struct hammer_alist_config Buf_alist_config;
75 struct hammer_alist_config Vol_normal_alist_config;
76 struct hammer_alist_config Vol_super_alist_config;
77 struct hammer_alist_config Supercl_alist_config;
78 struct hammer_alist_config Clu_master_alist_config;
79 struct hammer_alist_config Clu_slave_alist_config;
82 * Red-Black tree support for various structures
85 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
87 if (ip1->obj_id < ip2->obj_id)
89 if (ip1->obj_id > ip2->obj_id)
91 if (ip1->obj_asof < ip2->obj_asof)
93 if (ip1->obj_asof > ip2->obj_asof)
99 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
101 if (info->obj_id < ip->obj_id)
103 if (info->obj_id > ip->obj_id)
105 if (info->obj_asof < ip->obj_asof)
107 if (info->obj_asof > ip->obj_asof)
113 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
115 if (vol1->vol_no < vol2->vol_no)
117 if (vol1->vol_no > vol2->vol_no)
123 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
125 if (cl1->scl_no < cl2->scl_no)
127 if (cl1->scl_no > cl2->scl_no)
133 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
135 if (cl1->clu_no < cl2->clu_no)
137 if (cl1->clu_no > cl2->clu_no)
143 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
145 if (buf1->buf_no < buf2->buf_no)
147 if (buf1->buf_no > buf2->buf_no)
153 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
155 if (node1->node_offset < node2->node_offset)
157 if (node1->node_offset > node2->node_offset)
163 * Note: The lookup function for hammer_ino_rb_tree winds up being named
164 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
165 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
167 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
168 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
169 hammer_inode_info_cmp, hammer_inode_info_t);
170 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
171 hammer_vol_rb_compare, int32_t, vol_no);
172 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
173 hammer_scl_rb_compare, int32_t, scl_no);
174 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
175 hammer_clu_rb_compare, int32_t, clu_no);
176 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
177 hammer_buf_rb_compare, int32_t, buf_no);
178 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
179 hammer_nod_rb_compare, int32_t, node_offset);
181 /************************************************************************
183 ************************************************************************
185 * Load a HAMMER volume by name. Returns 0 on success or a positive error
186 * code on failure. Volumes must be loaded at mount time, get_volume() will
187 * not load a new volume.
189 * Calls made to hammer_load_volume() or single-threaded
192 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
195 hammer_volume_t volume;
196 struct hammer_volume_ondisk *ondisk;
197 struct nlookupdata nd;
198 struct buf *bp = NULL;
203 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
206 * Allocate a volume structure
208 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
209 volume->vol_name = kstrdup(volname, M_HAMMER);
211 volume->io.type = HAMMER_STRUCTURE_VOLUME;
212 volume->io.offset = 0LL;
215 * Get the device vnode
217 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
219 error = nlookup(&nd);
221 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
224 vn_isdisk(volume->devvp, &error);
227 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
228 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
230 vn_unlock(volume->devvp);
233 hammer_free_volume(volume);
238 * Extract the volume number from the volume header and do various
241 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
244 ondisk = (void *)bp->b_data;
245 if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
246 kprintf("hammer_mount: volume %s has an invalid header\n",
251 volume->vol_no = ondisk->vol_no;
252 volume->cluster_base = ondisk->vol_clo_beg;
253 volume->vol_clsize = ondisk->vol_clsize;
254 volume->vol_flags = ondisk->vol_flags;
255 volume->nblocks = ondisk->vol_nblocks;
256 RB_INIT(&volume->rb_clus_root);
257 RB_INIT(&volume->rb_scls_root);
259 hmp->mp->mnt_stat.f_blocks += volume->nblocks;
261 if (RB_EMPTY(&hmp->rb_vols_root)) {
262 hmp->fsid = ondisk->vol_fsid;
263 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
264 kprintf("hammer_mount: volume %s's fsid does not match "
265 "other volumes\n", volume->vol_name);
271 * Insert the volume structure into the red-black tree.
273 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
274 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
275 volume->vol_name, volume->vol_no);
280 * Set the root volume and load the root cluster. HAMMER special
281 * cases rootvol and rootcl and will not deallocate the structures.
282 * We do not hold a ref because this would prevent related I/O
283 * from being flushed.
285 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
286 hmp->rootvol = volume;
287 hmp->rootcl = hammer_get_cluster(volume,
288 ondisk->vol0_root_clu_no,
290 hammer_rel_cluster(hmp->rootcl, 0);
291 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
297 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
298 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
299 hammer_free_volume(volume);
305 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
306 * so returns -1 on failure.
309 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
311 struct hammer_mount *hmp = volume->hmp;
312 hammer_cluster_t rootcl;
313 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
316 * Sync clusters, sync volume
319 hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
322 * Clean up the root cluster, which is held unlocked in the root
325 if (hmp->rootvol == volume) {
326 if ((rootcl = hmp->rootcl) != NULL)
332 * Unload clusters and super-clusters. Unloading a super-cluster
333 * also unloads related clusters, but the filesystem may not be
334 * using super-clusters so unload clusters anyway.
336 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
337 hammer_unload_cluster, NULL);
338 RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
339 hammer_unload_supercl, NULL);
342 * Release our buffer and flush anything left in the buffer cache.
344 hammer_io_release(&volume->io, 1);
347 * There should be no references on the volume, no clusters, and
350 KKASSERT(volume->io.lock.refs == 0);
351 KKASSERT(RB_EMPTY(&volume->rb_clus_root));
352 KKASSERT(RB_EMPTY(&volume->rb_scls_root));
354 volume->ondisk = NULL;
357 vinvalbuf(volume->devvp, 0, 0, 0);
358 VOP_CLOSE(volume->devvp, FREAD);
360 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
361 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
366 * Destroy the structure
368 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
369 hammer_free_volume(volume);
375 hammer_free_volume(hammer_volume_t volume)
377 if (volume->vol_name) {
378 kfree(volume->vol_name, M_HAMMER);
379 volume->vol_name = NULL;
382 vrele(volume->devvp);
383 volume->devvp = NULL;
385 kfree(volume, M_HAMMER);
389 * Get a HAMMER volume. The volume must already exist.
392 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
394 struct hammer_volume *volume;
397 * Locate the volume structure
399 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
400 if (volume == NULL) {
404 hammer_ref(&volume->io.lock);
407 * Deal with on-disk info
409 if (volume->ondisk == NULL) {
410 *errorp = hammer_load_volume(volume);
412 hammer_rel_volume(volume, 1);
422 hammer_ref_volume(hammer_volume_t volume)
426 hammer_ref(&volume->io.lock);
429 * Deal with on-disk info
431 if (volume->ondisk == NULL) {
432 error = hammer_load_volume(volume);
434 hammer_rel_volume(volume, 1);
442 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
444 hammer_volume_t volume;
446 volume = hmp->rootvol;
447 KKASSERT(volume != NULL);
448 hammer_ref(&volume->io.lock);
451 * Deal with on-disk info
453 if (volume->ondisk == NULL) {
454 *errorp = hammer_load_volume(volume);
456 hammer_rel_volume(volume, 1);
466 * Load a volume's on-disk information. The volume must be referenced and
467 * not locked. We temporarily acquire an exclusive lock to interlock
468 * against releases or multiple get's.
471 hammer_load_volume(hammer_volume_t volume)
473 struct hammer_volume_ondisk *ondisk;
476 hammer_lock_ex(&volume->io.lock);
477 if (volume->ondisk == NULL) {
478 error = hammer_io_read(volume->devvp, &volume->io);
480 hammer_unlock(&volume->io.lock);
483 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
486 * Configure the volume's A-lists. These are used to
489 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
490 volume->alist.config = &Vol_super_alist_config;
491 volume->alist.meta = ondisk->vol_almeta.super;
492 volume->alist.info = volume;
494 volume->alist.config = &Vol_normal_alist_config;
495 volume->alist.meta = ondisk->vol_almeta.normal;
496 volume->alist.info = NULL;
498 hammer_alist_init(&volume->alist);
502 hammer_unlock(&volume->io.lock);
507 * Release a volume. Call hammer_io_release on the last reference. We have
508 * to acquire an exclusive lock to interlock against volume->ondisk tests
509 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
512 * Volumes are not unloaded from memory during normal operation.
515 hammer_rel_volume(hammer_volume_t volume, int flush)
517 if (volume->io.lock.refs == 1) {
518 hammer_lock_ex(&volume->io.lock);
519 if (volume->io.lock.refs == 1) {
520 volume->ondisk = NULL;
521 hammer_io_release(&volume->io, flush);
523 hammer_unlock(&volume->io.lock);
525 hammer_unref(&volume->io.lock);
528 /************************************************************************
530 ************************************************************************
532 * Manage super-clusters. Note that a supercl holds a reference to its
536 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
537 int *errorp, int isnew)
539 hammer_supercl_t supercl;
542 * Locate and lock the super-cluster structure, creating one
546 supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
547 if (supercl == NULL) {
548 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
549 supercl->scl_no = scl_no;
550 supercl->volume = volume;
551 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
552 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
553 hammer_ref(&supercl->io.lock);
556 * Insert the cluster into the RB tree and handle late
559 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
560 hammer_unref(&supercl->io.lock);
561 kfree(supercl, M_HAMMER);
564 hammer_ref(&volume->io.lock);
566 hammer_ref(&supercl->io.lock);
570 * Deal with on-disk info
572 if (supercl->ondisk == NULL || isnew) {
573 *errorp = hammer_load_supercl(supercl, isnew);
575 hammer_rel_supercl(supercl, 1);
585 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
587 struct hammer_supercl_ondisk *ondisk;
588 hammer_volume_t volume = supercl->volume;
591 hammer_lock_ex(&supercl->io.lock);
592 if (supercl->ondisk == NULL) {
594 error = hammer_io_new(volume->devvp, &supercl->io);
596 error = hammer_io_read(volume->devvp, &supercl->io);
598 hammer_unlock(&supercl->io.lock);
601 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
603 supercl->alist.config = &Supercl_alist_config;
604 supercl->alist.meta = ondisk->scl_meta;
605 supercl->alist.info = NULL;
607 error = hammer_io_new(volume->devvp, &supercl->io);
611 if (error == 0 && isnew) {
613 * If this is a new super-cluster we have to initialize
614 * various ondisk structural elements. The caller is
615 * responsible for the remainder.
617 struct hammer_alist_live dummy;
619 ondisk = supercl->ondisk;
620 dummy.config = &Buf_alist_config;
621 dummy.meta = ondisk->head.buf_almeta;
623 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
624 hammer_alist_init(&supercl->alist);
626 hammer_unlock(&supercl->io.lock);
631 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
634 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
636 KKASSERT(supercl->io.lock.refs == 0);
637 hammer_ref(&supercl->io.lock);
638 hammer_rel_supercl(supercl, 1);
643 * Release a super-cluster. We have to deal with several places where
644 * another thread can ref the super-cluster.
646 * Only destroy the structure itself if the related buffer cache buffer
647 * was disassociated from it. This ties the management of the structure
648 * to the buffer cache subsystem.
651 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
653 hammer_volume_t volume;
655 if (supercl->io.lock.refs == 1) {
656 hammer_lock_ex(&supercl->io.lock);
657 if (supercl->io.lock.refs == 1) {
658 hammer_io_release(&supercl->io, flush);
659 if (supercl->io.bp == NULL &&
660 supercl->io.lock.refs == 1) {
661 volume = supercl->volume;
662 RB_REMOVE(hammer_scl_rb_tree,
663 &volume->rb_scls_root, supercl);
664 supercl->volume = NULL; /* sanity */
665 kfree(supercl, M_HAMMER);
666 hammer_rel_volume(volume, 0);
670 hammer_unlock(&supercl->io.lock);
672 hammer_unref(&supercl->io.lock);
675 /************************************************************************
677 ************************************************************************
679 * Manage clusters. Note that a cluster holds a reference to its
683 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
684 int *errorp, int isnew)
686 hammer_cluster_t cluster;
689 cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
690 if (cluster == NULL) {
691 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
692 cluster->clu_no = clu_no;
693 cluster->volume = volume;
694 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
695 cluster->state = HAMMER_CLUSTER_IDLE;
696 RB_INIT(&cluster->rb_bufs_root);
697 RB_INIT(&cluster->rb_nods_root);
698 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
699 hammer_ref(&cluster->io.lock);
702 * Insert the cluster into the RB tree and handle late
705 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
706 hammer_unref(&cluster->io.lock);
707 kfree(cluster, M_HAMMER);
710 hammer_ref(&volume->io.lock);
712 hammer_ref(&cluster->io.lock);
716 * Deal with on-disk info
718 if (cluster->ondisk == NULL || isnew) {
719 *errorp = hammer_load_cluster(cluster, isnew);
721 hammer_rel_cluster(cluster, 1);
731 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
733 hammer_cluster_t cluster;
735 cluster = hmp->rootcl;
736 KKASSERT(cluster != NULL);
737 hammer_ref(&cluster->io.lock);
740 * Deal with on-disk info
742 if (cluster->ondisk == NULL) {
743 *errorp = hammer_load_cluster(cluster, 0);
745 hammer_rel_cluster(cluster, 1);
756 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
758 hammer_volume_t volume = cluster->volume;
759 struct hammer_cluster_ondisk *ondisk;
763 * Load the cluster's on-disk info
765 hammer_lock_ex(&cluster->io.lock);
766 if (cluster->ondisk == NULL) {
768 error = hammer_io_new(volume->devvp, &cluster->io);
770 error = hammer_io_read(volume->devvp, &cluster->io);
772 hammer_unlock(&cluster->io.lock);
775 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
777 cluster->alist_master.config = &Clu_master_alist_config;
778 cluster->alist_master.meta = ondisk->clu_master_meta;
779 cluster->alist_btree.config = &Clu_slave_alist_config;
780 cluster->alist_btree.meta = ondisk->clu_btree_meta;
781 cluster->alist_btree.info = cluster;
782 cluster->alist_record.config = &Clu_slave_alist_config;
783 cluster->alist_record.meta = ondisk->clu_record_meta;
784 cluster->alist_record.info = cluster;
785 cluster->alist_mdata.config = &Clu_slave_alist_config;
786 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
787 cluster->alist_mdata.info = cluster;
789 cluster->clu_btree_beg = ondisk->clu_btree_beg;
790 cluster->clu_btree_end = ondisk->clu_btree_end;
792 error = hammer_io_new(volume->devvp, &cluster->io);
796 if (error == 0 && isnew) {
798 * If this is a new cluster we have to initialize
799 * various ondisk structural elements. The caller is
800 * responsible for the remainder.
802 struct hammer_alist_live dummy;
804 ondisk = cluster->ondisk;
806 dummy.config = &Buf_alist_config;
807 dummy.meta = ondisk->head.buf_almeta;
809 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
811 hammer_alist_init(&cluster->alist_master);
812 hammer_alist_init(&cluster->alist_btree);
813 hammer_alist_init(&cluster->alist_record);
814 hammer_alist_init(&cluster->alist_mdata);
816 hammer_unlock(&cluster->io.lock);
821 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
824 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
826 hammer_ref(&cluster->io.lock);
827 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
828 hammer_unload_buffer, NULL);
829 KKASSERT(cluster->io.lock.refs == 1);
830 hammer_rel_cluster(cluster, 1);
835 * Reference a cluster that is either already referenced or via a specially
836 * handled pointer (aka rootcl).
839 hammer_ref_cluster(hammer_cluster_t cluster)
843 KKASSERT(cluster != NULL);
844 hammer_ref(&cluster->io.lock);
847 * Deal with on-disk info
849 if (cluster->ondisk == NULL) {
850 error = hammer_load_cluster(cluster, 0);
852 hammer_rel_cluster(cluster, 1);
860 * Release a cluster. We have to deal with several places where
861 * another thread can ref the cluster.
863 * Only destroy the structure itself if the related buffer cache buffer
864 * was disassociated from it. This ties the management of the structure
865 * to the buffer cache subsystem.
868 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
871 hammer_volume_t volume;
873 if (cluster->io.lock.refs == 1) {
874 hammer_lock_ex(&cluster->io.lock);
875 if (cluster->io.lock.refs == 1) {
877 * Release the I/O. If we or the kernel wants to
878 * flush, this will release the bp. Otherwise the
879 * bp may be written and flushed passively by the
882 hammer_io_release(&cluster->io, flush);
885 * The B-Tree node cache is not counted in the
886 * cluster's reference count. Clean out the
889 * If the cluster acquires a new reference while we
890 * are trying to clean it out, abort the cleaning.
892 * Any actively referenced nodes will reference the
893 * related buffer and cluster, so a ref count check
894 * should be sufficient.
896 while (cluster->io.bp == NULL &&
897 cluster->io.lock.refs == 1 &&
898 (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
900 KKASSERT(node->lock.refs == 0);
901 hammer_flush_node(node);
907 if (cluster->io.bp == NULL &&
908 cluster->io.lock.refs == 1 &&
909 RB_EMPTY(&cluster->rb_nods_root)) {
910 KKASSERT(RB_EMPTY(&cluster->rb_bufs_root));
911 volume = cluster->volume;
912 RB_REMOVE(hammer_clu_rb_tree,
913 &volume->rb_clus_root, cluster);
914 cluster->volume = NULL; /* sanity */
915 kfree(cluster, M_HAMMER);
916 hammer_rel_volume(volume, 0);
920 hammer_unlock(&cluster->io.lock);
922 hammer_unref(&cluster->io.lock);
925 /************************************************************************
927 ************************************************************************
929 * Manage buffers. Note that a buffer holds a reference to its associated
930 * cluster, and its cluster will hold a reference to the cluster's volume.
932 * A non-zero buf_type indicates that a new buffer should be created and
936 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
937 u_int64_t buf_type, int *errorp)
939 hammer_buffer_t buffer;
942 * Find the buffer. Note that buffer 0 corresponds to the cluster
943 * header and should never be requested.
945 KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
946 buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
949 * Locate and lock the buffer structure, creating one if necessary.
952 buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
953 if (buffer == NULL) {
954 buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
955 buffer->buf_no = buf_no;
956 buffer->cluster = cluster;
957 buffer->volume = cluster->volume;
958 buffer->io.offset = cluster->io.offset +
959 (buf_no * HAMMER_BUFSIZE);
960 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
961 TAILQ_INIT(&buffer->clist);
962 hammer_ref(&buffer->io.lock);
965 * Insert the cluster into the RB tree and handle late
968 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
969 hammer_unref(&buffer->io.lock);
970 kfree(buffer, M_HAMMER);
973 hammer_ref(&cluster->io.lock);
975 hammer_ref(&buffer->io.lock);
979 * Deal with on-disk info
981 if (buffer->ondisk == NULL || buf_type) {
982 *errorp = hammer_load_buffer(buffer, buf_type);
984 hammer_rel_buffer(buffer, 1);
994 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
996 hammer_volume_t volume;
997 hammer_fsbuf_ondisk_t ondisk;
1001 * Load the buffer's on-disk info
1003 volume = buffer->volume;
1004 hammer_lock_ex(&buffer->io.lock);
1005 if (buffer->ondisk == NULL) {
1007 error = hammer_io_new(volume->devvp, &buffer->io);
1009 error = hammer_io_read(volume->devvp, &buffer->io);
1012 hammer_unlock(&buffer->io.lock);
1015 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1016 buffer->alist.config = &Buf_alist_config;
1017 buffer->alist.meta = ondisk->head.buf_almeta;
1018 buffer->buf_type = ondisk->head.buf_type;
1019 } else if (buf_type) {
1020 error = hammer_io_new(volume->devvp, &buffer->io);
1024 if (error == 0 && buf_type) {
1025 ondisk = buffer->ondisk;
1026 initbuffer(&buffer->alist, &ondisk->head, buf_type);
1027 buffer->buf_type = ondisk->head.buf_type;
1029 hammer_unlock(&buffer->io.lock);
1034 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1037 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1039 hammer_ref(&buffer->io.lock);
1040 hammer_flush_buffer_nodes(buffer);
1041 KKASSERT(buffer->io.lock.refs == 1);
1042 hammer_rel_buffer(buffer, 1);
1047 * Reference a buffer that is either already referenced or via a specially
1048 * handled pointer (aka cursor->buffer).
1051 hammer_ref_buffer(hammer_buffer_t buffer)
1055 hammer_ref(&buffer->io.lock);
1056 if (buffer->ondisk == NULL) {
1057 error = hammer_load_buffer(buffer, 0);
1059 hammer_rel_buffer(buffer, 1);
1061 * NOTE: buffer pointer can become stale after
1062 * the above release.
1065 KKASSERT(buffer->buf_type ==
1066 buffer->ondisk->head.buf_type);
1075 * Release a buffer. We have to deal with several places where
1076 * another thread can ref the buffer.
1078 * Only destroy the structure itself if the related buffer cache buffer
1079 * was disassociated from it. This ties the management of the structure
1080 * to the buffer cache subsystem. buffer->ondisk determines whether the
1081 * embedded io is referenced or not.
1084 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1086 hammer_cluster_t cluster;
1089 if (buffer->io.lock.refs == 1) {
1090 hammer_lock_ex(&buffer->io.lock);
1091 if (buffer->io.lock.refs == 1) {
1092 hammer_io_release(&buffer->io, flush);
1095 * Clean out the B-Tree node cache, if any, then
1096 * clean up the cluster ref and free the buffer.
1098 * If the buffer acquires a new reference while we
1099 * are trying to clean it out, abort the cleaning.
1101 while (buffer->io.bp == NULL &&
1102 buffer->io.lock.refs == 1 &&
1103 (node = TAILQ_FIRST(&buffer->clist)) != NULL
1105 KKASSERT(node->lock.refs == 0);
1106 hammer_flush_node(node);
1108 if (buffer->io.bp == NULL &&
1109 hammer_islastref(&buffer->io.lock)) {
1110 cluster = buffer->cluster;
1111 RB_REMOVE(hammer_buf_rb_tree,
1112 &cluster->rb_bufs_root, buffer);
1113 buffer->cluster = NULL; /* sanity */
1114 kfree(buffer, M_HAMMER);
1115 hammer_rel_cluster(cluster, 0);
1119 hammer_unlock(&buffer->io.lock);
1121 hammer_unref(&buffer->io.lock);
1125 * Flush passively cached B-Tree nodes associated with this buffer.
1127 * NOTE: The buffer is referenced and locked.
1130 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1134 node = TAILQ_FIRST(&buffer->clist);
1136 buffer->save_scan = TAILQ_NEXT(node, entry);
1137 if (node->lock.refs == 0)
1138 hammer_flush_node(node);
1139 node = buffer->save_scan;
1143 /************************************************************************
1145 ************************************************************************
1147 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1148 * method used by the HAMMER filesystem.
1150 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1151 * associated with its buffer. It can have an active buffer reference
1152 * even when the node itself has no references. The node also passively
1153 * associates itself with its cluster without holding any cluster refs.
1154 * The cluster ref is indirectly maintained by the active buffer ref when
1155 * a node is acquired.
1157 * A hammer_node can also be passively associated with other HAMMER
1158 * structures, such as inodes, while retaining 0 references. These
1159 * associations can be cleared backwards using a pointer-to-pointer in
1162 * This allows the HAMMER implementation to cache hammer_node's long-term
1163 * and short-cut a great deal of the infrastructure's complexity. In
1164 * most cases a cached node can be reacquired without having to dip into
1165 * either the buffer or cluster management code.
1167 * The caller must pass a referenced cluster on call and will retain
1168 * ownership of the reference on return. The node will acquire its own
1169 * additional references, if necessary.
1172 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1177 * Locate the structure, allocating one if necessary.
1180 node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1183 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1184 node->node_offset = node_offset;
1185 node->cluster = cluster;
1186 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1188 kfree(node, M_HAMMER);
1192 *errorp = hammer_ref_node(node);
1195 * NOTE: The node pointer may be stale on error return.
1196 * In fact, its probably been destroyed.
1204 * Reference the node to prevent disassociations, then associate and
1205 * load the related buffer. This routine can also be called to reference
1206 * a node from a cache pointer.
1208 * NOTE: Because the caller does not have a ref on the node, the caller's
1209 * node pointer will be stale if an error is returned. We may also wind
1210 * up clearing the related cache pointers.
1212 * NOTE: The cluster is indirectly referenced by our buffer ref.
1215 hammer_ref_node(hammer_node_t node)
1217 hammer_buffer_t buffer;
1221 hammer_ref(&node->lock);
1223 if (node->ondisk == NULL) {
1224 hammer_lock_ex(&node->lock);
1225 if (node->ondisk == NULL) {
1227 * This is a little confusing but the jist is that
1228 * node->buffer determines whether the node is on
1229 * the buffer's clist and node->ondisk determines
1230 * whether the buffer is referenced.
1232 if ((buffer = node->buffer) != NULL) {
1233 error = hammer_ref_buffer(buffer);
1235 buf_no = node->node_offset / HAMMER_BUFSIZE;
1236 buffer = hammer_get_buffer(node->cluster,
1239 KKASSERT(error == 0);
1240 TAILQ_INSERT_TAIL(&buffer->clist,
1242 node->buffer = buffer;
1246 node->ondisk = (void *)((char *)buffer->ondisk +
1247 (node->node_offset & HAMMER_BUFMASK));
1250 hammer_unlock(&node->lock);
1253 hammer_rel_node(node);
1258 * Release a hammer_node. The node retains a passive association with
1259 * its cluster, buffer and caches.
1261 * However, to avoid cluttering up kernel memory with tons of B-Tree
1262 * node cache structures we destroy the node if no passive cache or
1263 * (instantiated) buffer references exist.
1266 hammer_rel_node(hammer_node_t node)
1268 hammer_cluster_t cluster;
1269 hammer_buffer_t buffer;
1271 if (hammer_islastref(&node->lock)) {
1272 cluster = node->cluster;
1274 * Clutter control, this case only occurs after a failed
1275 * load since otherwise ondisk will be non-NULL.
1277 if (node->cache1 == NULL && node->cache2 == NULL &&
1278 node->ondisk == NULL) {
1279 RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1281 if ((buffer = node->buffer) != NULL) {
1282 node->buffer = NULL;
1283 hammer_remove_node_clist(buffer, node);
1285 kfree(node, M_HAMMER);
1290 * node->ondisk determines whether we have a buffer reference
1291 * to get rid of or not. Only get rid of the reference if
1292 * the kernel tried to flush the buffer.
1294 * NOTE: Once unref'd the node can be physically destroyed,
1295 * so our node is stale afterwords.
1297 * This case occurs if the node still has cache references.
1298 * We could remove the references and free the structure
1299 * but for now we allow them (and the node structure) to
1302 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1303 buffer = node->buffer;
1304 node->buffer = NULL;
1305 node->ondisk = NULL;
1306 hammer_remove_node_clist(buffer, node);
1307 hammer_unref(&node->lock);
1308 hammer_rel_buffer(buffer, 0);
1310 hammer_unref(&node->lock);
1313 hammer_unref(&node->lock);
1318 * Cache-and-release a hammer_node. Kinda like catching and releasing a
1319 * fish, but keeping an eye on him. The node is passively cached in *cache.
1321 * NOTE! HAMMER may NULL *cache at any time, even after you have
1322 * referenced the node!
1325 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1327 if (node->cache1 != cache) {
1328 if (node->cache2 == cache) {
1329 struct hammer_node **tmp;
1331 node->cache1 = node->cache2;
1335 *node->cache2 = NULL;
1336 node->cache2 = node->cache1;
1337 node->cache1 = cache;
1344 hammer_uncache_node(struct hammer_node **cache)
1348 if ((node = *cache) != NULL) {
1350 if (node->cache1 == cache) {
1351 node->cache1 = node->cache2;
1352 node->cache2 = NULL;
1353 } else if (node->cache2 == cache) {
1354 node->cache2 = NULL;
1356 panic("hammer_uncache_node: missing cache linkage");
1358 if (node->cache1 == NULL && node->cache2 == NULL &&
1359 node->lock.refs == 0) {
1360 hammer_flush_node(node);
1366 * Remove a node's cache references and destroy the node if it has no
1367 * references. This is typically called from the buffer handling code.
1369 * The node may have an active buffer reference (ondisk != NULL) even
1370 * if the node itself has no references.
1372 * Note that a caller iterating through nodes via a buffer must have its
1373 * own reference on the buffer or our hammer_rel_buffer() call below may
1374 * rip it out from under the caller.
1377 hammer_flush_node(hammer_node_t node)
1379 hammer_buffer_t buffer;
1382 *node->cache1 = NULL;
1384 *node->cache2 = NULL;
1385 if (node->lock.refs == 0) {
1386 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1388 if ((buffer = node->buffer) != NULL) {
1389 node->buffer = NULL;
1390 hammer_remove_node_clist(buffer, node);
1392 node->ondisk = NULL;
1393 hammer_rel_buffer(buffer, 0);
1396 kfree(node, M_HAMMER);
1401 * Remove a node from the buffer's clist. Adjust save_scan as appropriate.
1402 * This is in its own little routine to properly handle interactions with
1403 * save_scan, so it is possible to block while scanning a buffer's node list.
1407 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1409 if (buffer->save_scan == node)
1410 buffer->save_scan = TAILQ_NEXT(node, entry);
1411 TAILQ_REMOVE(&buffer->clist, node, entry);
1414 /************************************************************************
1415 * A-LIST ALLOCATORS *
1416 ************************************************************************/
1419 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1421 * The passed *bufferp should be initialized to NULL. On successive calls
1422 * *bufferp caches the most recent buffer used until put away by the caller.
1423 * Note that previously returned pointers using the cached buffer become
1424 * invalid on successive calls which reuse *bufferp.
1426 * All allocations first attempt to use the block found at the specified
1427 * iterator. If that fails the first available block is used. If that
1428 * fails a new buffer is allocated and associated with the buffer type
1429 * A-list and the element is allocated out of the new buffer.
1433 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1435 hammer_buffer_t buffer;
1436 hammer_alist_t live;
1440 int32_t node_offset;
1443 * Allocate a B-Tree element
1446 live = &cluster->alist_btree;
1447 elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1448 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1449 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1450 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1451 alloc_new_buffer(cluster, live,
1452 HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1453 cluster->ondisk->idx_index, errorp, &buffer);
1454 elm_no = hammer_alist_alloc(live, 1);
1455 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1458 hammer_rel_buffer(buffer, 0);
1459 hammer_modify_cluster(cluster);
1463 cluster->ondisk->idx_index = elm_no;
1464 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1465 hammer_modify_cluster(cluster);
1468 * Load and return the B-Tree element
1470 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1471 node_offset = buf_no * HAMMER_BUFSIZE +
1472 offsetof(union hammer_fsbuf_ondisk,
1473 btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1474 node = hammer_get_node(cluster, node_offset, errorp);
1476 bzero(node->ondisk, sizeof(*node->ondisk));
1478 hammer_alist_free(live, elm_no, 1);
1479 hammer_rel_node(node);
1483 hammer_rel_buffer(buffer, 0);
1488 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1489 int *errorp, struct hammer_buffer **bufferp)
1491 hammer_buffer_t buffer;
1492 hammer_alist_t live;
1499 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1500 * for these allocations.
1502 if ((bytes & HAMMER_BUFMASK) == 0) {
1503 nblks = bytes / HAMMER_BUFSIZE;
1504 /* only one block allowed for now (so buffer can hold it) */
1505 KKASSERT(nblks == 1);
1507 buf_no = hammer_alloc_master(cluster, nblks,
1508 cluster->ondisk->idx_ldata, 1);
1509 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1513 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1514 cluster->ondisk->idx_ldata = buf_no;
1516 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1518 hammer_rel_buffer(buffer, 0);
1520 return(buffer->ondisk);
1524 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1525 * (64 bytes) for these allocations.
1527 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1528 nblks /= HAMMER_DATA_BLKSIZE;
1529 live = &cluster->alist_mdata;
1530 elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1531 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1532 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1533 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1534 alloc_new_buffer(cluster, live,
1535 HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1536 cluster->ondisk->idx_data, errorp, bufferp);
1537 elm_no = hammer_alist_alloc(live, nblks);
1538 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1540 hammer_modify_cluster(cluster);
1544 cluster->ondisk->idx_index = elm_no;
1545 hammer_modify_cluster(cluster);
1548 * Load and return the B-Tree element
1550 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1552 if (buffer == NULL || buffer->cluster != cluster ||
1553 buffer->buf_no != buf_no) {
1555 hammer_rel_buffer(buffer, 0);
1556 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1559 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1560 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1561 item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1562 bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1568 hammer_alloc_record(hammer_cluster_t cluster,
1569 int *errorp, struct hammer_buffer **bufferp)
1571 hammer_buffer_t buffer;
1572 hammer_alist_t live;
1578 * Allocate a record element
1580 live = &cluster->alist_record;
1581 elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1582 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1583 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1584 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1585 alloc_new_buffer(cluster, live,
1586 HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1587 cluster->ondisk->idx_record, errorp, bufferp);
1588 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1589 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1590 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1592 hammer_modify_cluster(cluster);
1596 cluster->ondisk->idx_record = elm_no;
1597 hammer_modify_cluster(cluster);
1600 * Load and return the B-Tree element
1602 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1604 if (buffer == NULL || buffer->cluster != cluster ||
1605 buffer->buf_no != buf_no) {
1607 hammer_rel_buffer(buffer, 0);
1608 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1611 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1612 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1613 item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1614 bzero(item, sizeof(union hammer_record_ondisk));
1620 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1624 hammer_alist_t live;
1626 if ((bytes & HAMMER_BUFMASK) == 0) {
1627 nblks = bytes / HAMMER_BUFSIZE;
1628 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1629 hammer_alist_free(&buffer->cluster->alist_master,
1630 buffer->buf_no, nblks);
1631 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1632 hammer_modify_cluster(buffer->cluster);
1636 elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1637 HAMMER_DATA_BLKSIZE;
1638 KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1639 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1640 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1641 nblks /= HAMMER_DATA_BLKSIZE;
1642 live = &buffer->cluster->alist_mdata;
1643 hammer_alist_free(live, elm_no, nblks);
1644 hammer_modify_cluster(buffer->cluster);
1648 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1651 hammer_alist_t live;
1653 elm_no = rec - &buffer->ondisk->record.recs[0];
1654 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1655 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1656 live = &buffer->cluster->alist_record;
1657 hammer_alist_free(live, elm_no, 1);
1658 hammer_modify_cluster(buffer->cluster);
1662 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1664 const int32_t blksize = sizeof(struct hammer_node_ondisk);
1665 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1666 hammer_alist_t live;
1669 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1670 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1671 live = &cluster->alist_btree;
1672 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1673 elm_no += fsbuf_offset / blksize;
1674 hammer_alist_free(live, elm_no, 1);
1675 hammer_modify_cluster(cluster);
1679 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1681 const int32_t blksize = HAMMER_DATA_BLKSIZE;
1682 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1683 hammer_alist_t live;
1688 if ((bytes & HAMMER_BUFMASK) == 0) {
1689 nblks = bytes / HAMMER_BUFSIZE;
1690 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1691 buf_no = bclu_offset / HAMMER_BUFSIZE;
1692 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1693 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1694 hammer_modify_cluster(cluster);
1698 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1699 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1700 live = &cluster->alist_mdata;
1701 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1702 nblks /= HAMMER_DATA_BLKSIZE;
1703 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1704 elm_no += fsbuf_offset / blksize;
1705 hammer_alist_free(live, elm_no, nblks);
1706 hammer_modify_cluster(cluster);
1710 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1712 const int32_t blksize = sizeof(union hammer_record_ondisk);
1713 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1714 hammer_alist_t live;
1717 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1718 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1719 live = &cluster->alist_record;
1720 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1721 elm_no += fsbuf_offset / blksize;
1722 hammer_alist_free(live, elm_no, 1);
1723 hammer_modify_cluster(cluster);
1728 * Allocate a new filesystem buffer and assign it to the specified
1729 * filesystem buffer type. The new buffer will be added to the
1730 * type-specific A-list and initialized.
1733 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1734 u_int64_t type, int32_t nelements,
1735 int start, int *errorp, struct hammer_buffer **bufferp)
1737 hammer_buffer_t buffer;
1741 start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */
1743 isfwd = (type != HAMMER_FSBUF_RECORDS);
1744 buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1745 KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1746 hammer_modify_cluster(cluster);
1749 * The new buffer must be initialized (type != 0) regardless of
1750 * whether we already have it cached or not, so don't try to
1751 * optimize the cached buffer check. Just call hammer_get_buffer().
1753 buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1755 hammer_rel_buffer(*bufferp, 0);
1759 * Finally, do a meta-free of the buffer's elements into the
1760 * type-specific A-list and update our statistics to reflect
1764 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1765 buf_no, type, nelements);
1766 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
1768 hammer_adjust_stats(cluster, type, 1);
1773 * Sync dirty buffers to the media
1777 * Sync the entire filesystem.
1780 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
1782 struct hammer_sync_info info;
1785 info.waitfor = waitfor;
1787 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
1788 hammer_sync_volume, &info);
1793 hammer_sync_volume(hammer_volume_t volume, void *data)
1795 struct hammer_sync_info *info = data;
1797 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
1798 hammer_sync_cluster, info);
1799 if (hammer_ref_volume(volume) == 0) {
1800 hammer_io_flush(&volume->io, info);
1801 hammer_rel_volume(volume, 0);
1807 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
1809 struct hammer_sync_info *info = data;
1811 if (cluster->state != HAMMER_CLUSTER_IDLE) {
1812 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
1813 hammer_sync_buffer, info);
1814 if (hammer_ref_cluster(cluster) == 0) {
1815 hammer_io_flush(&cluster->io, info);
1816 hammer_rel_cluster(cluster, 0);
1823 hammer_sync_buffer(hammer_buffer_t buffer, void *data)
1825 struct hammer_sync_info *info = data;
1827 if (hammer_ref_buffer(buffer) == 0) {
1828 hammer_lock_ex(&buffer->io.lock);
1829 hammer_flush_buffer_nodes(buffer);
1830 hammer_unlock(&buffer->io.lock);
1831 hammer_io_flush(&buffer->io, info);
1832 hammer_rel_buffer(buffer, 0);
1840 * Flush various tracking structures to disk
1844 * Flush various tracking structures to disk
1847 flush_all_volumes(void)
1849 hammer_volume_t vol;
1851 for (vol = VolBase; vol; vol = vol->next)
1856 flush_volume(hammer_volume_t vol)
1858 hammer_supercl_t supercl;
1859 hammer_cluster_t cl;
1861 for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1862 flush_supercl(supercl);
1863 for (cl = vol->cluster_base; cl; cl = cl->next)
1865 writehammerbuf(vol, vol->ondisk, 0);
1869 flush_supercl(hammer_supercl_t supercl)
1871 int64_t supercl_offset;
1873 supercl_offset = supercl->scl_offset;
1874 writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1878 flush_cluster(hammer_cluster_t cl)
1880 hammer_buffer_t buf;
1881 int64_t cluster_offset;
1883 for (buf = cl->buffer_base; buf; buf = buf->next)
1885 cluster_offset = cl->clu_offset;
1886 writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1890 flush_buffer(hammer_buffer_t buf)
1892 int64_t buffer_offset;
1894 buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1895 writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1901 * Generic buffer initialization
1904 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1906 head->buf_type = type;
1907 hammer_alist_init(live);
1911 * Calculate the cluster's offset in the volume. This calculation is
1912 * slightly more complex when using superclusters because superclusters
1913 * are grouped in blocks of 16, followed by 16 x N clusters where N
1914 * is the number of clusters a supercluster can manage.
1917 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1920 int64_t scl_group_size;
1923 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1924 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1925 HAMMER_SCL_MAXCLUSTERS;
1927 ((int64_t)HAMMER_BUFSIZE *
1928 HAMMER_VOL_SUPERCLUSTER_GROUP) +
1929 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1930 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1932 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1934 off = volume->cluster_base +
1935 scl_group * scl_group_size +
1936 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1937 ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1938 HAMMER_VOL_SUPERCLUSTER_GROUP))
1941 off = volume->cluster_base +
1942 (int64_t)clu_no * volume->vol_clsize;
1948 * Calculate a super-cluster's offset in the volume.
1951 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1955 int64_t scl_group_size;
1957 KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1958 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1961 ((int64_t)HAMMER_BUFSIZE *
1962 HAMMER_VOL_SUPERCLUSTER_GROUP) +
1963 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1964 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1966 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1967 off = volume->cluster_base + (scl_group * scl_group_size) +
1968 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1970 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1980 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
1981 int32_t start, int isfwd)
1986 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1988 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1989 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1993 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1995 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1996 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1997 nblks, HAMMER_ALIST_BLOCK_MAX);
2002 * Recover space from empty record, b-tree, and data a-lists.
2009 * Adjust allocation statistics
2012 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2015 case HAMMER_FSBUF_BTREE:
2016 cluster->ondisk->stat_idx_bufs += nblks;
2017 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2018 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2020 case HAMMER_FSBUF_DATA:
2021 cluster->ondisk->stat_data_bufs += nblks;
2022 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2023 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2025 case HAMMER_FSBUF_RECORDS:
2026 cluster->ondisk->stat_rec_bufs += nblks;
2027 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2028 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2031 hammer_modify_cluster(cluster);
2032 hammer_modify_volume(cluster->volume);
2033 hammer_modify_volume(cluster->volume->hmp->rootvol);
2039 * Setup the parameters for the various A-lists we use in hammer. The
2040 * supercluster A-list must be chained to the cluster A-list and cluster
2041 * slave A-lists are chained to buffer A-lists.
2043 * See hammer_init_alist_config() below.
2047 * A-LIST - cluster recursion into a filesystem buffer
2050 buffer_alist_init(void *info, int32_t blk, int32_t radix)
2054 hammer_cluster_t cluster = info;
2055 hammer_buffer_t buffer;
2060 * Calculate the buffer number, initialize based on the buffer type.
2061 * The buffer has already been allocated so assert that it has been
2064 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2065 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2067 hammer_adjust_stats(cluster, buffer->ondisk->head.buf_type, 1);
2068 hammer_rel_buffer(buffer, 0);
2075 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2079 hammer_cluster_t cluster = info;
2080 hammer_buffer_t buffer;
2085 * Calculate the buffer number, initialize based on the buffer type.
2086 * The buffer has already been allocated so assert that it has been
2089 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2090 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2092 hammer_adjust_stats(cluster, buffer->ondisk->head.buf_type, -1);
2093 hammer_rel_buffer(buffer, 0);
2100 * Note: atblk can be negative and atblk - blk can go negative.
2103 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2104 int32_t count, int32_t atblk, int32_t *fullp)
2106 hammer_cluster_t cluster = info;
2107 hammer_buffer_t buffer;
2112 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2113 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2115 KKASSERT(buffer->ondisk->head.buf_type != 0);
2117 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2118 if (r != HAMMER_ALIST_BLOCK_NONE)
2120 *fullp = hammer_alist_isfull(&buffer->alist);
2121 hammer_modify_buffer(buffer);
2122 hammer_rel_buffer(buffer, 0);
2124 r = HAMMER_ALIST_BLOCK_NONE;
2130 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2131 int32_t count, int32_t atblk, int32_t *fullp)
2133 hammer_cluster_t cluster = info;
2134 hammer_buffer_t buffer;
2139 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2140 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2142 KKASSERT(buffer->ondisk->head.buf_type != 0);
2144 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2145 if (r != HAMMER_ALIST_BLOCK_NONE)
2147 *fullp = hammer_alist_isfull(&buffer->alist);
2148 hammer_modify_buffer(buffer);
2149 hammer_rel_buffer(buffer, 0);
2151 r = HAMMER_ALIST_BLOCK_NONE;
2158 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2159 int32_t base_blk, int32_t count, int32_t *emptyp)
2161 hammer_cluster_t cluster = info;
2162 hammer_buffer_t buffer;
2166 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2167 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2169 KKASSERT(buffer->ondisk->head.buf_type != 0);
2170 hammer_alist_free(&buffer->alist, base_blk, count);
2171 *emptyp = hammer_alist_isempty(&buffer->alist);
2172 /* XXX don't bother updating the buffer is completely empty? */
2173 hammer_modify_buffer(buffer);
2174 hammer_rel_buffer(buffer, 0);
2181 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2186 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2187 * into a filesystem buffer. A-List's are mostly self-contained entities,
2188 * but callbacks must be installed to recurse from one A-List to another.
2190 * Implementing these callbacks allows us to operate a multi-layered A-List
2191 * as a single entity.
2194 super_alist_init(void *info, int32_t blk, int32_t radix)
2196 hammer_volume_t volume = info;
2197 hammer_supercl_t supercl;
2202 * Calculate the super-cluster number containing the cluster (blk)
2203 * and obtain the super-cluster buffer.
2205 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2206 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2208 hammer_rel_supercl(supercl, 0);
2213 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2219 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2220 int32_t count, int32_t atblk, int32_t *fullp)
2222 hammer_volume_t volume = info;
2223 hammer_supercl_t supercl;
2228 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2229 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2231 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2232 if (r != HAMMER_ALIST_BLOCK_NONE)
2234 *fullp = hammer_alist_isfull(&supercl->alist);
2235 hammer_modify_supercl(supercl);
2236 hammer_rel_supercl(supercl, 0);
2238 r = HAMMER_ALIST_BLOCK_NONE;
2245 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2246 int32_t count, int32_t atblk, int32_t *fullp)
2248 hammer_volume_t volume = info;
2249 hammer_supercl_t supercl;
2254 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2255 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2257 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2258 if (r != HAMMER_ALIST_BLOCK_NONE)
2260 *fullp = hammer_alist_isfull(&supercl->alist);
2261 hammer_modify_supercl(supercl);
2262 hammer_rel_supercl(supercl, 0);
2264 r = HAMMER_ALIST_BLOCK_NONE;
2271 super_alist_free(void *info, int32_t blk, int32_t radix,
2272 int32_t base_blk, int32_t count, int32_t *emptyp)
2274 hammer_volume_t volume = info;
2275 hammer_supercl_t supercl;
2279 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2280 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2282 hammer_alist_free(&supercl->alist, base_blk, count);
2283 *emptyp = hammer_alist_isempty(&supercl->alist);
2284 hammer_modify_supercl(supercl);
2285 hammer_rel_supercl(supercl, 0);
2292 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2297 hammer_init_alist_config(void)
2299 hammer_alist_config_t config;
2301 hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2302 1, HAMMER_FSBUF_METAELMS);
2303 hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2304 1, HAMMER_VOL_METAELMS_1LYR);
2305 hammer_alist_template(&Vol_super_alist_config,
2306 HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2307 HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2308 hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2309 1, HAMMER_SUPERCL_METAELMS);
2310 hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2311 1, HAMMER_CLU_MASTER_METAELMS);
2312 hammer_alist_template(&Clu_slave_alist_config,
2313 HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2314 HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2316 config = &Vol_super_alist_config;
2317 config->bl_radix_init = super_alist_init;
2318 config->bl_radix_destroy = super_alist_destroy;
2319 config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2320 config->bl_radix_alloc_rev = super_alist_alloc_rev;
2321 config->bl_radix_free = super_alist_free;
2322 config->bl_radix_print = super_alist_print;
2324 config = &Clu_slave_alist_config;
2325 config->bl_radix_init = buffer_alist_init;
2326 config->bl_radix_destroy = buffer_alist_destroy;
2327 config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2328 config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2329 config->bl_radix_free = buffer_alist_free;
2330 config->bl_radix_print = buffer_alist_print;