2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.8 2007/11/27 07:48:52 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58 hammer_alist_t live, u_int64_t type, int32_t nelements,
60 int *errorp, struct hammer_buffer **bufferp);
62 static void readhammerbuf(hammer_volume_t vol, void *data,
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
70 struct hammer_alist_config Buf_alist_config;
71 struct hammer_alist_config Vol_normal_alist_config;
72 struct hammer_alist_config Vol_super_alist_config;
73 struct hammer_alist_config Supercl_alist_config;
74 struct hammer_alist_config Clu_master_alist_config;
75 struct hammer_alist_config Clu_slave_alist_config;
78 * Red-Black tree support for various structures
81 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
83 if (ip1->obj_id < ip2->obj_id)
85 if (ip1->obj_id > ip2->obj_id)
87 if (ip1->obj_asof < ip2->obj_asof)
89 if (ip1->obj_asof > ip2->obj_asof)
95 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
97 if (info->obj_id < ip->obj_id)
99 if (info->obj_id > ip->obj_id)
101 if (info->obj_asof < ip->obj_asof)
103 if (info->obj_asof > ip->obj_asof)
109 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
111 if (vol1->vol_no < vol2->vol_no)
113 if (vol1->vol_no > vol2->vol_no)
119 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
121 if (cl1->scl_no < cl2->scl_no)
123 if (cl1->scl_no > cl2->scl_no)
129 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
131 if (cl1->clu_no < cl2->clu_no)
133 if (cl1->clu_no > cl2->clu_no)
139 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
141 if (buf1->buf_no < buf2->buf_no)
143 if (buf1->buf_no > buf2->buf_no)
149 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
151 if (node1->node_offset < node2->node_offset)
153 if (node1->node_offset > node2->node_offset)
159 * Note: The lookup function for hammer_ino_rb_tree winds up being named
160 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
161 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
163 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
164 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
165 hammer_inode_info_cmp, hammer_inode_info_t);
166 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
167 hammer_vol_rb_compare, int32_t, vol_no);
168 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
169 hammer_scl_rb_compare, int32_t, scl_no);
170 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
171 hammer_clu_rb_compare, int32_t, clu_no);
172 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
173 hammer_buf_rb_compare, int32_t, buf_no);
174 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
175 hammer_nod_rb_compare, int32_t, node_offset);
177 /************************************************************************
179 ************************************************************************
181 * Load a HAMMER volume by name. Returns 0 on success or a positive error
182 * code on failure. Volumes must be loaded at mount time, get_volume() will
183 * not load a new volume.
185 * Calls made to hammer_load_volume() or single-threaded
188 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
191 hammer_volume_t volume;
192 struct hammer_volume_ondisk *ondisk;
193 struct nlookupdata nd;
194 struct buf *bp = NULL;
199 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
202 * Allocate a volume structure
204 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
205 volume->vol_name = kstrdup(volname, M_HAMMER);
207 volume->io.type = HAMMER_STRUCTURE_VOLUME;
208 volume->io.offset = 0LL;
211 * Get the device vnode
213 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
215 error = nlookup(&nd);
217 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
220 vn_isdisk(volume->devvp, &error);
223 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
224 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
226 vn_unlock(volume->devvp);
229 hammer_free_volume(volume);
234 * Extract the volume number from the volume header and do various
237 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
240 ondisk = (void *)bp->b_data;
241 if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
242 kprintf("hammer_mount: volume %s has an invalid header\n",
247 volume->vol_no = ondisk->vol_no;
248 volume->cluster_base = ondisk->vol_clo_beg;
249 volume->vol_clsize = ondisk->vol_clsize;
250 volume->vol_flags = ondisk->vol_flags;
251 RB_INIT(&volume->rb_clus_root);
252 RB_INIT(&volume->rb_scls_root);
254 if (RB_EMPTY(&hmp->rb_vols_root)) {
255 hmp->fsid = ondisk->vol_fsid;
256 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
257 kprintf("hammer_mount: volume %s's fsid does not match "
258 "other volumes\n", volume->vol_name);
264 * Insert the volume structure into the red-black tree.
266 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
267 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
268 volume->vol_name, volume->vol_no);
273 * Set the root volume and load the root cluster. HAMMER special
274 * cases rootvol and rootcl and will not deallocate the structures.
275 * We do not hold a ref because this would prevent related I/O
276 * from being flushed.
278 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
279 hmp->rootvol = volume;
280 hmp->rootcl = hammer_get_cluster(volume,
281 ondisk->vol0_root_clu_no,
283 hammer_rel_cluster(hmp->rootcl, 0);
284 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
290 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
291 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
292 hammer_free_volume(volume);
298 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
299 * so returns -1 on failure.
302 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
304 struct hammer_mount *hmp = volume->hmp;
305 hammer_cluster_t rootcl;
306 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
309 * Sync clusters, sync volume
314 * Clean up the root cluster, which is held unlocked in the root
317 if (hmp->rootvol == volume) {
318 if ((rootcl = hmp->rootcl) != NULL)
324 * Unload clusters and super-clusters. Unloading a super-cluster
325 * also unloads related clusters, but the filesystem may not be
326 * using super-clusters so unload clusters anyway.
328 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
329 hammer_unload_cluster, NULL);
330 RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
331 hammer_unload_supercl, NULL);
334 * Release our buffer and flush anything left in the buffer cache.
336 hammer_io_release(&volume->io, 1);
339 * There should be no references on the volume.
341 KKASSERT(volume->io.lock.refs == 0);
343 volume->ondisk = NULL;
346 vinvalbuf(volume->devvp, 0, 0, 0);
347 VOP_CLOSE(volume->devvp, FREAD);
349 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
350 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
355 * Destroy the structure
357 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
358 hammer_free_volume(volume);
364 hammer_free_volume(hammer_volume_t volume)
366 if (volume->vol_name) {
367 kfree(volume->vol_name, M_HAMMER);
368 volume->vol_name = NULL;
371 vrele(volume->devvp);
372 volume->devvp = NULL;
374 kfree(volume, M_HAMMER);
378 * Get a HAMMER volume. The volume must already exist.
381 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
383 struct hammer_volume *volume;
386 * Locate the volume structure
388 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
389 if (volume == NULL) {
393 hammer_ref(&volume->io.lock);
396 * Deal with on-disk info
398 if (volume->ondisk == NULL) {
399 *errorp = hammer_load_volume(volume);
401 hammer_rel_volume(volume, 1);
411 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
413 hammer_volume_t volume;
415 volume = hmp->rootvol;
416 KKASSERT(volume != NULL);
417 hammer_ref(&volume->io.lock);
420 * Deal with on-disk info
422 if (volume->ondisk == NULL) {
423 *errorp = hammer_load_volume(volume);
425 hammer_rel_volume(volume, 1);
435 * Load a volume's on-disk information. The volume must be referenced and
436 * not locked. We temporarily acquire an exclusive lock to interlock
437 * against releases or multiple get's.
440 hammer_load_volume(hammer_volume_t volume)
442 struct hammer_volume_ondisk *ondisk;
445 hammer_lock_ex(&volume->io.lock);
446 if (volume->ondisk == NULL) {
447 error = hammer_io_read(volume->devvp, &volume->io);
449 hammer_unlock(&volume->io.lock);
452 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
455 * Configure the volume's A-lists. These are used to
458 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
459 volume->alist.config = &Vol_super_alist_config;
460 volume->alist.meta = ondisk->vol_almeta.super;
461 volume->alist.info = volume;
463 volume->alist.config = &Vol_normal_alist_config;
464 volume->alist.meta = ondisk->vol_almeta.normal;
465 volume->alist.info = NULL;
467 hammer_alist_init(&volume->alist);
471 hammer_unlock(&volume->io.lock);
476 * Release a volume. Call hammer_io_release on the last reference. We have
477 * to acquire an exclusive lock to interlock against volume->ondisk tests
478 * in hammer_load_volume().
481 hammer_rel_volume(hammer_volume_t volume, int flush)
483 if (hammer_islastref(&volume->io.lock)) {
484 hammer_lock_ex(&volume->io.lock);
485 if (hammer_islastref(&volume->io.lock)) {
486 volume->ondisk = NULL;
487 hammer_io_release(&volume->io, flush);
489 hammer_unlock(&volume->io.lock);
491 hammer_unref(&volume->io.lock);
494 /************************************************************************
496 ************************************************************************
498 * Manage super-clusters. Note that a supercl holds a reference to its
502 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
503 int *errorp, int isnew)
505 hammer_supercl_t supercl;
508 * Locate and lock the super-cluster structure, creating one
512 supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
513 if (supercl == NULL) {
514 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
515 supercl->scl_no = scl_no;
516 supercl->volume = volume;
517 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
518 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
519 hammer_ref(&supercl->io.lock);
522 * Insert the cluster into the RB tree and handle late
525 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
526 hammer_unref(&supercl->io.lock);
527 kfree(supercl, M_HAMMER);
530 hammer_ref(&volume->io.lock);
532 hammer_ref(&supercl->io.lock);
536 * Deal with on-disk info
538 if (supercl->ondisk == NULL || isnew) {
539 *errorp = hammer_load_supercl(supercl, isnew);
541 hammer_rel_supercl(supercl, 1);
551 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
553 struct hammer_supercl_ondisk *ondisk;
554 hammer_volume_t volume = supercl->volume;
557 hammer_lock_ex(&supercl->io.lock);
558 if (supercl->ondisk == NULL) {
560 error = hammer_io_new(volume->devvp, &supercl->io);
562 error = hammer_io_read(volume->devvp, &supercl->io);
564 hammer_unlock(&supercl->io.lock);
567 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
569 supercl->alist.config = &Supercl_alist_config;
570 supercl->alist.meta = ondisk->scl_meta;
571 supercl->alist.info = NULL;
573 error = hammer_io_new(volume->devvp, &supercl->io);
577 if (error == 0 && isnew) {
579 * If this is a new super-cluster we have to initialize
580 * various ondisk structural elements. The caller is
581 * responsible for the remainder.
583 struct hammer_alist_live dummy;
585 ondisk = supercl->ondisk;
586 dummy.config = &Buf_alist_config;
587 dummy.meta = ondisk->head.buf_almeta;
589 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
590 hammer_alist_init(&supercl->alist);
592 hammer_unlock(&supercl->io.lock);
597 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
600 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
602 KKASSERT(supercl->io.lock.refs == 0);
603 hammer_ref(&supercl->io.lock);
604 hammer_io_release(&supercl->io, 1);
605 hammer_rel_supercl(supercl, 1);
610 * Release a super-cluster. We have to deal with several places where
611 * another thread can ref the super-cluster.
613 * Only destroy the structure itself if the related buffer cache buffer
614 * was disassociated from it. This ties the management of the structure
615 * to the buffer cache subsystem.
618 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
620 hammer_volume_t volume;
622 if (hammer_islastref(&supercl->io.lock)) {
623 hammer_lock_ex(&supercl->io.lock);
624 if (hammer_islastref(&supercl->io.lock)) {
625 hammer_io_release(&supercl->io, flush);
626 if (supercl->io.bp == NULL &&
627 hammer_islastref(&supercl->io.lock)) {
628 volume = supercl->volume;
629 RB_REMOVE(hammer_scl_rb_tree,
630 &volume->rb_scls_root, supercl);
631 supercl->volume = NULL; /* sanity */
632 kfree(supercl, M_HAMMER);
633 hammer_rel_volume(volume, 0);
637 hammer_unlock(&supercl->io.lock);
639 hammer_unref(&supercl->io.lock);
642 /************************************************************************
644 ************************************************************************
646 * Manage clusters. Note that a cluster holds a reference to its
650 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
651 int *errorp, int isnew)
653 hammer_cluster_t cluster;
656 cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
657 if (cluster == NULL) {
658 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
659 cluster->clu_no = clu_no;
660 cluster->volume = volume;
661 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
662 RB_INIT(&cluster->rb_bufs_root);
663 RB_INIT(&cluster->rb_nods_root);
664 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
665 hammer_ref(&cluster->io.lock);
668 * Insert the cluster into the RB tree and handle late
671 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
672 hammer_unref(&cluster->io.lock);
673 kfree(cluster, M_HAMMER);
676 hammer_ref(&volume->io.lock);
678 hammer_ref(&cluster->io.lock);
682 * Deal with on-disk info
684 if (cluster->ondisk == NULL || isnew) {
685 *errorp = hammer_load_cluster(cluster, isnew);
687 hammer_rel_cluster(cluster, 1);
697 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
699 hammer_cluster_t cluster;
701 cluster = hmp->rootcl;
702 KKASSERT(cluster != NULL);
703 hammer_ref(&cluster->io.lock);
706 * Deal with on-disk info
708 if (cluster->ondisk == NULL) {
709 *errorp = hammer_load_cluster(cluster, 0);
711 hammer_rel_cluster(cluster, 1);
722 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
724 hammer_volume_t volume = cluster->volume;
725 struct hammer_cluster_ondisk *ondisk;
729 * Load the cluster's on-disk info
731 hammer_lock_ex(&cluster->io.lock);
732 if (cluster->ondisk == NULL) {
734 error = hammer_io_new(volume->devvp, &cluster->io);
736 error = hammer_io_read(volume->devvp, &cluster->io);
738 hammer_unlock(&cluster->io.lock);
741 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
743 cluster->alist_master.config = &Clu_master_alist_config;
744 cluster->alist_master.meta = ondisk->clu_master_meta;
745 cluster->alist_btree.config = &Clu_slave_alist_config;
746 cluster->alist_btree.meta = ondisk->clu_btree_meta;
747 cluster->alist_btree.info = cluster;
748 cluster->alist_record.config = &Clu_slave_alist_config;
749 cluster->alist_record.meta = ondisk->clu_record_meta;
750 cluster->alist_record.info = cluster;
751 cluster->alist_mdata.config = &Clu_slave_alist_config;
752 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
753 cluster->alist_mdata.info = cluster;
755 cluster->clu_btree_beg = ondisk->clu_btree_beg;
756 cluster->clu_btree_end = ondisk->clu_btree_end;
758 error = hammer_io_new(volume->devvp, &cluster->io);
762 if (error == 0 && isnew) {
764 * If this is a new cluster we have to initialize
765 * various ondisk structural elements. The caller is
766 * responsible for the remainder.
768 struct hammer_alist_live dummy;
770 ondisk = cluster->ondisk;
772 dummy.config = &Buf_alist_config;
773 dummy.meta = ondisk->head.buf_almeta;
775 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
777 hammer_alist_init(&cluster->alist_master);
778 hammer_alist_init(&cluster->alist_btree);
779 hammer_alist_init(&cluster->alist_record);
780 hammer_alist_init(&cluster->alist_mdata);
782 hammer_unlock(&cluster->io.lock);
787 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
790 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
792 hammer_ref(&cluster->io.lock);
793 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
794 hammer_unload_buffer, NULL);
795 KKASSERT(cluster->io.lock.refs == 1);
796 hammer_io_release(&cluster->io, 1);
797 hammer_rel_cluster(cluster, 1);
802 * Reference a cluster that is either already referenced or via a specially
803 * handled pointer (aka rootcl).
806 hammer_ref_cluster(hammer_cluster_t cluster)
810 KKASSERT(cluster != NULL);
811 hammer_ref(&cluster->io.lock);
814 * Deal with on-disk info
816 if (cluster->ondisk == NULL) {
817 error = hammer_load_cluster(cluster, 0);
819 hammer_rel_cluster(cluster, 1);
827 * Release a cluster. We have to deal with several places where
828 * another thread can ref the cluster.
830 * Only destroy the structure itself if the related buffer cache buffer
831 * was disassociated from it. This ties the management of the structure
832 * to the buffer cache subsystem.
835 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
838 hammer_volume_t volume;
840 if (hammer_islastref(&cluster->io.lock)) {
841 hammer_lock_ex(&cluster->io.lock);
842 if (hammer_islastref(&cluster->io.lock)) {
843 hammer_io_release(&cluster->io, flush);
846 * Clean out the B-Tree node cache, if any, then
847 * clean up the volume ref and free the cluster.
849 * If the cluster acquires a new reference while we
850 * are trying to clean it out, abort the cleaning.
852 * There really shouldn't be any nodes at this point
853 * but we allow a node with no buffer association
854 * so handle the case.
856 while (cluster->io.bp == NULL &&
857 hammer_islastref(&cluster->io.lock) &&
858 (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
860 KKASSERT(node->lock.refs == 0);
861 hammer_flush_node(node);
863 if (cluster->io.bp == NULL &&
864 hammer_islastref(&cluster->io.lock)) {
865 volume = cluster->volume;
866 RB_REMOVE(hammer_clu_rb_tree,
867 &volume->rb_clus_root, cluster);
868 cluster->volume = NULL; /* sanity */
869 kfree(cluster, M_HAMMER);
870 hammer_rel_volume(volume, 0);
874 hammer_unlock(&cluster->io.lock);
876 hammer_unref(&cluster->io.lock);
879 /************************************************************************
881 ************************************************************************
883 * Manage buffers. Note that a buffer holds a reference to its associated
884 * cluster, and its cluster will hold a reference to the cluster's volume.
886 * A non-zero buf_type indicates that a new buffer should be created and
890 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
891 u_int64_t buf_type, int *errorp)
893 hammer_buffer_t buffer;
896 * Find the buffer. Note that buffer 0 corresponds to the cluster
897 * header and should never be requested.
899 KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
900 buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
903 * Locate and lock the buffer structure, creating one if necessary.
906 buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
907 if (buffer == NULL) {
908 buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
909 buffer->buf_no = buf_no;
910 buffer->cluster = cluster;
911 buffer->volume = cluster->volume;
912 buffer->io.offset = cluster->io.offset +
913 (buf_no * HAMMER_BUFSIZE);
914 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
915 TAILQ_INIT(&buffer->clist);
916 hammer_ref(&buffer->io.lock);
919 * Insert the cluster into the RB tree and handle late
922 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
923 hammer_unref(&buffer->io.lock);
924 kfree(buffer, M_HAMMER);
927 hammer_ref(&cluster->io.lock);
929 hammer_ref(&buffer->io.lock);
933 * Deal with on-disk info
935 if (buffer->ondisk == NULL || buf_type) {
936 *errorp = hammer_load_buffer(buffer, buf_type);
938 hammer_rel_buffer(buffer, 1);
948 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
950 hammer_volume_t volume;
951 hammer_fsbuf_ondisk_t ondisk;
955 * Load the buffer's on-disk info
957 volume = buffer->volume;
958 hammer_lock_ex(&buffer->io.lock);
959 if (buffer->ondisk == NULL) {
961 error = hammer_io_new(volume->devvp, &buffer->io);
963 error = hammer_io_read(volume->devvp, &buffer->io);
966 hammer_unlock(&buffer->io.lock);
969 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
970 buffer->alist.config = &Buf_alist_config;
971 buffer->alist.meta = ondisk->head.buf_almeta;
972 buffer->buf_type = ondisk->head.buf_type;
973 } else if (buf_type) {
974 error = hammer_io_new(volume->devvp, &buffer->io);
978 if (error == 0 && buf_type) {
979 ondisk = buffer->ondisk;
980 initbuffer(&buffer->alist, &ondisk->head, buf_type);
981 buffer->buf_type = ondisk->head.buf_type;
983 hammer_unlock(&buffer->io.lock);
988 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
991 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
993 hammer_ref(&buffer->io.lock);
994 hammer_flush_buffer_nodes(buffer);
995 hammer_io_release(&buffer->io, 1);
996 KKASSERT(buffer->io.lock.refs == 1);
997 hammer_rel_buffer(buffer, 1);
1002 * Reference a buffer that is either already referenced or via a specially
1003 * handled pointer (aka cursor->buffer).
1006 hammer_ref_buffer(hammer_buffer_t buffer)
1010 hammer_ref(&buffer->io.lock);
1011 if (buffer->ondisk == NULL) {
1012 error = hammer_load_buffer(buffer, 0);
1014 hammer_rel_buffer(buffer, 1);
1016 * NOTE: buffer pointer can become stale after
1017 * the above release.
1020 KKASSERT(buffer->buf_type ==
1021 buffer->ondisk->head.buf_type);
1030 * Release a buffer. We have to deal with several places where
1031 * another thread can ref the buffer.
1033 * Only destroy the structure itself if the related buffer cache buffer
1034 * was disassociated from it. This ties the management of the structure
1035 * to the buffer cache subsystem. buffer->ondisk determines whether the
1036 * embedded io is referenced or not.
1039 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1041 hammer_cluster_t cluster;
1044 if (hammer_islastref(&buffer->io.lock)) {
1045 hammer_lock_ex(&buffer->io.lock);
1046 if (hammer_islastref(&buffer->io.lock)) {
1047 hammer_io_release(&buffer->io, flush);
1050 * Clean out the B-Tree node cache, if any, then
1051 * clean up the cluster ref and free the buffer.
1053 * If the buffer acquires a new reference while we
1054 * are trying to clean it out, abort the cleaning.
1056 while (buffer->io.bp == NULL &&
1057 hammer_islastref(&buffer->io.lock) &&
1058 (node = TAILQ_FIRST(&buffer->clist)) != NULL
1060 KKASSERT(node->lock.refs == 0);
1061 hammer_flush_node(node);
1063 if (buffer->io.bp == NULL &&
1064 hammer_islastref(&buffer->io.lock)) {
1065 cluster = buffer->cluster;
1066 RB_REMOVE(hammer_buf_rb_tree,
1067 &cluster->rb_bufs_root, buffer);
1068 buffer->cluster = NULL; /* sanity */
1069 kfree(buffer, M_HAMMER);
1070 hammer_rel_cluster(cluster, 0);
1074 hammer_unlock(&buffer->io.lock);
1076 hammer_unref(&buffer->io.lock);
1080 * Flush passively cached B-Tree nodes associated with this buffer.
1082 * NOTE: The buffer is referenced and locked.
1085 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1089 node = TAILQ_FIRST(&buffer->clist);
1091 buffer->save_scan = TAILQ_NEXT(node, entry);
1092 if (node->lock.refs == 0)
1093 hammer_flush_node(node);
1094 node = buffer->save_scan;
1098 /************************************************************************
1100 ************************************************************************
1102 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1103 * method used by the HAMMER filesystem.
1105 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1106 * associated with its buffer. It can have an active buffer reference
1107 * even when the node itself has no references. The node also passively
1108 * associates itself with its cluster without holding any cluster refs.
1109 * The cluster ref is indirectly maintained by the active buffer ref when
1110 * a node is acquired.
1112 * A hammer_node can also be passively associated with other HAMMER
1113 * structures, such as inodes, while retaining 0 references. These
1114 * associations can be cleared backwards using a pointer-to-pointer in
1117 * This allows the HAMMER implementation to cache hammer_node's long-term
1118 * and short-cut a great deal of the infrastructure's complexity. In
1119 * most cases a cached node can be reacquired without having to dip into
1120 * either the buffer or cluster management code.
1122 * The caller must pass a referenced cluster on call and will retain
1123 * ownership of the reference on return. The node will acquire its own
1124 * additional references, if necessary.
1127 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1132 * Locate the structure, allocating one if necessary.
1135 node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1138 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1139 node->node_offset = node_offset;
1140 node->cluster = cluster;
1141 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1143 kfree(node, M_HAMMER);
1147 *errorp = hammer_ref_node(node);
1150 * NOTE: The node pointer may be stale on error return.
1151 * In fact, its probably been destroyed.
1159 * Reference the node to prevent disassociations, then associate and
1160 * load the related buffer. This routine can also be called to reference
1161 * a node from a cache pointer.
1163 * NOTE: Because the caller does not have a ref on the node, the caller's
1164 * node pointer will be stale if an error is returned. We may also wind
1165 * up clearing the related cache pointers.
1167 * NOTE: The cluster is indirectly referenced by our buffer ref.
1170 hammer_ref_node(hammer_node_t node)
1172 hammer_buffer_t buffer;
1176 hammer_ref(&node->lock);
1178 if (node->ondisk == NULL) {
1179 hammer_lock_ex(&node->lock);
1180 if (node->ondisk == NULL) {
1182 * This is a little confusing but the jist is that
1183 * node->buffer determines whether the node is on
1184 * the buffer's clist and node->ondisk determines
1185 * whether the buffer is referenced.
1187 if ((buffer = node->buffer) != NULL) {
1188 error = hammer_ref_buffer(buffer);
1190 buf_no = node->node_offset / HAMMER_BUFSIZE;
1191 buffer = hammer_get_buffer(node->cluster,
1194 KKASSERT(error == 0);
1195 TAILQ_INSERT_TAIL(&buffer->clist,
1197 node->buffer = buffer;
1201 node->ondisk = (void *)((char *)buffer->ondisk +
1202 (node->node_offset & HAMMER_BUFMASK));
1205 hammer_unlock(&node->lock);
1208 hammer_rel_node(node);
1213 * Release a hammer_node. The node retains a passive association with
1214 * its cluster, buffer and caches.
1216 * However, to avoid cluttering up kernel memory with tons of B-Tree
1217 * node cache structures we destroy the node if no passive cache or
1218 * (instantiated) buffer references exist.
1221 hammer_rel_node(hammer_node_t node)
1223 hammer_cluster_t cluster;
1224 hammer_buffer_t buffer;
1226 if (hammer_islastref(&node->lock)) {
1227 cluster = node->cluster;
1229 * Clutter control, this case only occurs after a failed
1230 * load since otherwise ondisk will be non-NULL.
1232 if (node->cache1 == NULL && node->cache2 == NULL &&
1233 node->ondisk == NULL) {
1234 RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1236 if ((buffer = node->buffer) != NULL) {
1237 node->buffer = NULL;
1238 hammer_remove_node_clist(buffer, node);
1240 kfree(node, M_HAMMER);
1245 * node->ondisk determines whether we have a buffer reference
1246 * to get rid of or not. Only get rid of the reference if
1247 * the kernel tried to flush the buffer.
1249 * NOTE: Once unref'd the node can be physically destroyed,
1250 * so our node is stale afterwords.
1252 * This case occurs if the node still has cache references.
1253 * We could remove the references and free the structure
1254 * but for now we allow them (and the node structure) to
1257 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1258 buffer = node->buffer;
1259 node->buffer = NULL;
1260 node->ondisk = NULL;
1261 hammer_remove_node_clist(buffer, node);
1262 hammer_unref(&node->lock);
1263 hammer_rel_buffer(buffer, 0);
1265 hammer_unref(&node->lock);
1268 hammer_unref(&node->lock);
1273 * Cache-and-release a hammer_node. Kinda like catching and releasing a
1274 * fish, but keeping an eye on him. The node is passively cached in *cache.
1276 * NOTE! HAMMER may NULL *cache at any time, even after you have
1277 * referenced the node!
1280 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1282 if (node->cache1 != cache) {
1283 if (node->cache2 == cache) {
1284 struct hammer_node **tmp;
1286 node->cache1 = node->cache2;
1290 *node->cache2 = NULL;
1291 node->cache2 = node->cache1;
1292 node->cache1 = cache;
1299 hammer_uncache_node(struct hammer_node **cache)
1303 if ((node = *cache) != NULL) {
1305 if (node->cache1 == cache) {
1306 node->cache1 = node->cache2;
1307 node->cache2 = NULL;
1308 } else if (node->cache2 == cache) {
1309 node->cache2 = NULL;
1311 panic("hammer_uncache_node: missing cache linkage");
1313 if (node->cache1 == NULL && node->cache2 == NULL &&
1314 node->lock.refs == 0) {
1315 hammer_flush_node(node);
1321 * Remove a node's cache references and destroy the node if it has no
1322 * references. This is typically called from the buffer handling code.
1324 * The node may have an active buffer reference (ondisk != NULL) even
1325 * if the node itself has no references.
1327 * Note that a caller iterating through nodes via a buffer must have its
1328 * own reference on the buffer or our hammer_rel_buffer() call below may
1329 * rip it out from under the caller.
1332 hammer_flush_node(hammer_node_t node)
1334 hammer_buffer_t buffer;
1337 *node->cache1 = NULL;
1339 *node->cache2 = NULL;
1340 if (node->lock.refs == 0) {
1341 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1343 if ((buffer = node->buffer) != NULL) {
1344 node->buffer = NULL;
1345 hammer_remove_node_clist(buffer, node);
1347 node->ondisk = NULL;
1348 hammer_rel_buffer(buffer, 0);
1351 kfree(node, M_HAMMER);
1356 * Remove a node from the buffer's clist. Adjust save_scan as appropriate.
1357 * This is in its own little routine to properly handle interactions with
1358 * save_scan, so it is possible to block while scanning a buffer's node list.
1362 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1364 if (buffer->save_scan == node)
1365 buffer->save_scan = TAILQ_NEXT(node, entry);
1366 TAILQ_REMOVE(&buffer->clist, node, entry);
1369 /************************************************************************
1370 * A-LIST ALLOCATORS *
1371 ************************************************************************/
1374 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1376 * The passed *bufferp should be initialized to NULL. On successive calls
1377 * *bufferp caches the most recent buffer used until put away by the caller.
1378 * Note that previously returned pointers using the cached buffer become
1379 * invalid on successive calls which reuse *bufferp.
1381 * All allocations first attempt to use the block found at the specified
1382 * iterator. If that fails the first available block is used. If that
1383 * fails a new buffer is allocated and associated with the buffer type
1384 * A-list and the element is allocated out of the new buffer.
1388 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1390 hammer_buffer_t buffer;
1391 hammer_alist_t live;
1395 int32_t node_offset;
1398 * Allocate a B-Tree element
1401 live = &cluster->alist_btree;
1402 elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1403 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1404 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1405 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1406 alloc_new_buffer(cluster, live,
1407 HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1408 cluster->ondisk->idx_index, errorp, &buffer);
1409 elm_no = hammer_alist_alloc(live, 1);
1410 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1413 hammer_rel_buffer(buffer, 0);
1414 hammer_modify_cluster(cluster);
1418 cluster->ondisk->idx_index = elm_no;
1419 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1420 hammer_modify_cluster(cluster);
1423 * Load and return the B-Tree element
1425 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1426 node_offset = buf_no * HAMMER_BUFSIZE +
1427 offsetof(union hammer_fsbuf_ondisk,
1428 btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1429 node = hammer_get_node(cluster, node_offset, errorp);
1431 bzero(node->ondisk, sizeof(*node->ondisk));
1433 hammer_alist_free(live, elm_no, 1);
1434 hammer_rel_node(node);
1438 hammer_rel_buffer(buffer, 0);
1443 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1444 int *errorp, struct hammer_buffer **bufferp)
1446 hammer_buffer_t buffer;
1447 hammer_alist_t live;
1454 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1455 * for these allocations.
1457 if ((bytes & HAMMER_BUFMASK) == 0) {
1458 nblks = bytes / HAMMER_BUFSIZE;
1459 /* only one block allowed for now (so buffer can hold it) */
1460 KKASSERT(nblks == 1);
1462 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1464 cluster->ondisk->idx_ldata);
1465 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1466 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1470 hammer_modify_cluster(cluster);
1471 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1475 cluster->ondisk->idx_ldata = buf_no;
1477 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1479 hammer_rel_buffer(buffer, 0);
1481 return(buffer->ondisk);
1485 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1486 * (64 bytes) for these allocations.
1488 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1489 nblks /= HAMMER_DATA_BLKSIZE;
1490 live = &cluster->alist_mdata;
1491 elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1492 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1493 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1494 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1495 alloc_new_buffer(cluster, live,
1496 HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1497 cluster->ondisk->idx_data, errorp, bufferp);
1498 elm_no = hammer_alist_alloc(live, nblks);
1499 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1501 hammer_modify_cluster(cluster);
1505 cluster->ondisk->idx_index = elm_no;
1506 hammer_modify_cluster(cluster);
1509 * Load and return the B-Tree element
1511 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1513 if (buffer == NULL || buffer->cluster != cluster ||
1514 buffer->buf_no != buf_no) {
1516 hammer_rel_buffer(buffer, 0);
1517 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1520 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1521 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1522 item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1523 bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1529 hammer_alloc_record(hammer_cluster_t cluster,
1530 int *errorp, struct hammer_buffer **bufferp)
1532 hammer_buffer_t buffer;
1533 hammer_alist_t live;
1539 * Allocate a record element
1541 live = &cluster->alist_record;
1542 kprintf("IDX_RECORD %d\n", cluster->ondisk->idx_record);
1543 elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1544 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1545 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1546 kprintf("hammer_alloc_record elm %08x\n", elm_no);
1547 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1548 alloc_new_buffer(cluster, live,
1549 HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1550 cluster->ondisk->idx_record, errorp, bufferp);
1551 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1552 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1553 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1555 hammer_modify_cluster(cluster);
1559 cluster->ondisk->idx_record = elm_no;
1560 hammer_modify_cluster(cluster);
1563 * Load and return the B-Tree element
1565 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1567 if (buffer == NULL || buffer->cluster != cluster ||
1568 buffer->buf_no != buf_no) {
1570 hammer_rel_buffer(buffer, 0);
1571 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1574 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1575 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1576 item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1577 bzero(item, sizeof(union hammer_record_ondisk));
1583 * Free HAMMER elements based on either a hammer_buffer and element pointer
1584 * or a cluster-relative byte offset.
1587 hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node)
1590 hammer_alist_t live;
1592 elm_no = node - &buffer->ondisk->btree.nodes[0];
1593 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1594 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1595 live = &buffer->cluster->alist_btree;
1596 hammer_alist_free(live, elm_no, 1);
1597 hammer_modify_cluster(buffer->cluster);
1601 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1605 hammer_alist_t live;
1607 if ((bytes & HAMMER_BUFMASK) == 0) {
1608 nblks = bytes / HAMMER_BUFSIZE;
1609 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1610 hammer_alist_free(&buffer->cluster->alist_master,
1611 buffer->buf_no, nblks);
1612 hammer_modify_cluster(buffer->cluster);
1616 elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1617 HAMMER_DATA_BLKSIZE;
1618 KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1619 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1620 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1621 nblks /= HAMMER_DATA_BLKSIZE;
1622 live = &buffer->cluster->alist_mdata;
1623 hammer_alist_free(live, elm_no, nblks);
1624 hammer_modify_cluster(buffer->cluster);
1628 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1631 hammer_alist_t live;
1633 elm_no = rec - &buffer->ondisk->record.recs[0];
1634 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1635 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1636 live = &buffer->cluster->alist_record;
1637 hammer_alist_free(live, elm_no, 1);
1638 hammer_modify_cluster(buffer->cluster);
1642 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1644 const int32_t blksize = sizeof(struct hammer_node_ondisk);
1645 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1646 hammer_alist_t live;
1649 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1650 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1651 live = &cluster->alist_btree;
1652 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1653 elm_no += fsbuf_offset / blksize;
1654 hammer_alist_free(live, elm_no, 1);
1655 hammer_modify_cluster(cluster);
1659 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1661 const int32_t blksize = HAMMER_DATA_BLKSIZE;
1662 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1663 hammer_alist_t live;
1668 if ((bytes & HAMMER_BUFMASK) == 0) {
1669 nblks = bytes / HAMMER_BUFSIZE;
1670 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1671 buf_no = bclu_offset / HAMMER_BUFSIZE;
1672 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1673 hammer_modify_cluster(cluster);
1677 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1678 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1679 live = &cluster->alist_mdata;
1680 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1681 nblks /= HAMMER_DATA_BLKSIZE;
1682 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1683 elm_no += fsbuf_offset / blksize;
1684 hammer_alist_free(live, elm_no, nblks);
1685 hammer_modify_cluster(cluster);
1689 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1691 const int32_t blksize = sizeof(union hammer_record_ondisk);
1692 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1693 hammer_alist_t live;
1696 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1697 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1698 live = &cluster->alist_record;
1699 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1700 elm_no += fsbuf_offset / blksize;
1701 hammer_alist_free(live, elm_no, 1);
1702 hammer_modify_cluster(cluster);
1707 * Allocate a new filesystem buffer and assign it to the specified
1708 * filesystem buffer type. The new buffer will be added to the
1709 * type-specific A-list and initialized.
1712 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1713 u_int64_t type, int32_t nelements,
1714 int start, int *errorp, struct hammer_buffer **bufferp)
1716 hammer_buffer_t buffer;
1719 start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */
1721 if (type == HAMMER_FSBUF_RECORDS) {
1722 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1724 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1725 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1726 1, HAMMER_ALIST_BLOCK_MAX);
1729 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1731 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1732 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1736 KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1737 hammer_modify_cluster(cluster);
1740 * The new buffer must be initialized (type != 0) regardless of
1741 * whether we already have it cached or not, so don't try to
1742 * optimize the cached buffer check. Just call hammer_get_buffer().
1744 buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1746 hammer_rel_buffer(*bufferp, 0);
1750 * Finally, do a meta-free of the buffer's elements.
1753 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1754 buf_no, type, nelements);
1755 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
1763 * Flush various tracking structures to disk
1767 * Flush various tracking structures to disk
1770 flush_all_volumes(void)
1772 hammer_volume_t vol;
1774 for (vol = VolBase; vol; vol = vol->next)
1779 flush_volume(hammer_volume_t vol)
1781 hammer_supercl_t supercl;
1782 hammer_cluster_t cl;
1784 for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1785 flush_supercl(supercl);
1786 for (cl = vol->cluster_base; cl; cl = cl->next)
1788 writehammerbuf(vol, vol->ondisk, 0);
1792 flush_supercl(hammer_supercl_t supercl)
1794 int64_t supercl_offset;
1796 supercl_offset = supercl->scl_offset;
1797 writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1801 flush_cluster(hammer_cluster_t cl)
1803 hammer_buffer_t buf;
1804 int64_t cluster_offset;
1806 for (buf = cl->buffer_base; buf; buf = buf->next)
1808 cluster_offset = cl->clu_offset;
1809 writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1813 flush_buffer(hammer_buffer_t buf)
1815 int64_t buffer_offset;
1817 buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1818 writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1824 * Generic buffer initialization
1827 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1829 head->buf_type = type;
1830 hammer_alist_init(live);
1834 * Calculate the cluster's offset in the volume. This calculation is
1835 * slightly more complex when using superclusters because superclusters
1836 * are grouped in blocks of 16, followed by 16 x N clusters where N
1837 * is the number of clusters a supercluster can manage.
1840 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1843 int64_t scl_group_size;
1846 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1847 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1848 HAMMER_SCL_MAXCLUSTERS;
1850 ((int64_t)HAMMER_BUFSIZE *
1851 HAMMER_VOL_SUPERCLUSTER_GROUP) +
1852 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1853 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1855 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1857 off = volume->cluster_base +
1858 scl_group * scl_group_size +
1859 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1860 ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1861 HAMMER_VOL_SUPERCLUSTER_GROUP))
1864 off = volume->cluster_base +
1865 (int64_t)clu_no * volume->vol_clsize;
1871 * Calculate a super-cluster's offset in the volume.
1874 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1878 int64_t scl_group_size;
1880 KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1881 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1884 ((int64_t)HAMMER_BUFSIZE *
1885 HAMMER_VOL_SUPERCLUSTER_GROUP) +
1886 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1887 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1889 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1890 off = volume->cluster_base + (scl_group * scl_group_size) +
1891 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1893 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1901 * Setup the parameters for the various A-lists we use in hammer. The
1902 * supercluster A-list must be chained to the cluster A-list and cluster
1903 * slave A-lists are chained to buffer A-lists.
1905 * See hammer_init_alist_config() below.
1909 * A-LIST - cluster recursion into a filesystem buffer
1912 buffer_alist_init(void *info, int32_t blk, int32_t radix)
1914 hammer_cluster_t cluster = info;
1915 hammer_buffer_t buffer;
1920 * Calculate the buffer number, initialize based on the buffer type.
1921 * The buffer has already been allocated so assert that it has been
1924 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1925 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1927 hammer_rel_buffer(buffer, 0);
1932 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
1938 * Note: atblk can be negative and atblk - blk can go negative.
1941 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1942 int32_t count, int32_t atblk, int32_t *fullp)
1944 hammer_cluster_t cluster = info;
1945 hammer_buffer_t buffer;
1950 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1951 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1953 KKASSERT(buffer->ondisk->head.buf_type != 0);
1955 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
1956 if (r != HAMMER_ALIST_BLOCK_NONE)
1958 *fullp = hammer_alist_isfull(&buffer->alist);
1959 hammer_modify_buffer(buffer);
1960 hammer_rel_buffer(buffer, 0);
1962 r = HAMMER_ALIST_BLOCK_NONE;
1968 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1969 int32_t count, int32_t atblk, int32_t *fullp)
1971 hammer_cluster_t cluster = info;
1972 hammer_buffer_t buffer;
1977 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1978 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1980 KKASSERT(buffer->ondisk->head.buf_type != 0);
1982 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
1983 if (r != HAMMER_ALIST_BLOCK_NONE)
1985 *fullp = hammer_alist_isfull(&buffer->alist);
1986 hammer_modify_buffer(buffer);
1987 hammer_rel_buffer(buffer, 0);
1989 r = HAMMER_ALIST_BLOCK_NONE;
1996 buffer_alist_free(void *info, int32_t blk, int32_t radix,
1997 int32_t base_blk, int32_t count, int32_t *emptyp)
1999 hammer_cluster_t cluster = info;
2000 hammer_buffer_t buffer;
2004 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2005 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2007 KKASSERT(buffer->ondisk->head.buf_type != 0);
2008 hammer_alist_free(&buffer->alist, base_blk, count);
2009 *emptyp = hammer_alist_isempty(&buffer->alist);
2010 hammer_modify_buffer(buffer);
2011 hammer_rel_buffer(buffer, 0);
2018 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2023 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2024 * into a filesystem buffer. A-List's are mostly self-contained entities,
2025 * but callbacks must be installed to recurse from one A-List to another.
2027 * Implementing these callbacks allows us to operate a multi-layered A-List
2028 * as a single entity.
2031 super_alist_init(void *info, int32_t blk, int32_t radix)
2033 hammer_volume_t volume = info;
2034 hammer_supercl_t supercl;
2039 * Calculate the super-cluster number containing the cluster (blk)
2040 * and obtain the super-cluster buffer.
2042 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2043 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2045 hammer_rel_supercl(supercl, 0);
2050 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2056 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2057 int32_t count, int32_t atblk, int32_t *fullp)
2059 hammer_volume_t volume = info;
2060 hammer_supercl_t supercl;
2065 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2066 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2068 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2069 if (r != HAMMER_ALIST_BLOCK_NONE)
2071 *fullp = hammer_alist_isfull(&supercl->alist);
2072 hammer_modify_supercl(supercl);
2073 hammer_rel_supercl(supercl, 0);
2075 r = HAMMER_ALIST_BLOCK_NONE;
2082 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2083 int32_t count, int32_t atblk, int32_t *fullp)
2085 hammer_volume_t volume = info;
2086 hammer_supercl_t supercl;
2091 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2092 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2094 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2095 if (r != HAMMER_ALIST_BLOCK_NONE)
2097 *fullp = hammer_alist_isfull(&supercl->alist);
2098 hammer_modify_supercl(supercl);
2099 hammer_rel_supercl(supercl, 0);
2101 r = HAMMER_ALIST_BLOCK_NONE;
2108 super_alist_free(void *info, int32_t blk, int32_t radix,
2109 int32_t base_blk, int32_t count, int32_t *emptyp)
2111 hammer_volume_t volume = info;
2112 hammer_supercl_t supercl;
2116 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2117 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2119 hammer_alist_free(&supercl->alist, base_blk, count);
2120 *emptyp = hammer_alist_isempty(&supercl->alist);
2121 hammer_modify_supercl(supercl);
2122 hammer_rel_supercl(supercl, 0);
2129 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2134 hammer_init_alist_config(void)
2136 hammer_alist_config_t config;
2138 hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2139 1, HAMMER_FSBUF_METAELMS);
2140 hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2141 1, HAMMER_VOL_METAELMS_1LYR);
2142 hammer_alist_template(&Vol_super_alist_config,
2143 HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2144 HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2145 hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2146 1, HAMMER_SUPERCL_METAELMS);
2147 hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2148 1, HAMMER_CLU_MASTER_METAELMS);
2149 hammer_alist_template(&Clu_slave_alist_config,
2150 HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2151 HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2153 config = &Vol_super_alist_config;
2154 config->bl_radix_init = super_alist_init;
2155 config->bl_radix_destroy = super_alist_destroy;
2156 config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2157 config->bl_radix_alloc_rev = super_alist_alloc_rev;
2158 config->bl_radix_free = super_alist_free;
2159 config->bl_radix_print = super_alist_print;
2161 config = &Clu_slave_alist_config;
2162 config->bl_radix_init = buffer_alist_init;
2163 config->bl_radix_destroy = buffer_alist_destroy;
2164 config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2165 config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2166 config->bl_radix_free = buffer_alist_free;
2167 config->bl_radix_print = buffer_alist_print;