2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.24 2008/01/24 02:14:45 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl,
51 hammer_alloc_state_t isnew);
52 static int hammer_load_cluster(hammer_cluster_t cluster, int getflags);
53 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
54 static int hammer_load_node(hammer_node_t node);
55 static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type,
57 int32_t start, int *errorp,
58 struct hammer_buffer **bufferp);
60 static void readhammerbuf(hammer_volume_t vol, void *data,
62 static void writehammerbuf(hammer_volume_t vol, const void *data,
65 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
66 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
67 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
68 int32_t start, int isfwd);
69 static void hammer_adjust_stats(hammer_cluster_t cluster,
70 u_int64_t buf_type, int nblks);
72 struct hammer_alist_config Buf_alist_config;
73 struct hammer_alist_config Vol_normal_alist_config;
74 struct hammer_alist_config Vol_super_alist_config;
75 struct hammer_alist_config Supercl_alist_config;
76 struct hammer_alist_config Clu_master_alist_config;
77 struct hammer_alist_config Clu_slave_alist_config;
80 * Red-Black tree support for various structures
83 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
85 if (ip1->obj_id < ip2->obj_id)
87 if (ip1->obj_id > ip2->obj_id)
89 if (ip1->obj_asof < ip2->obj_asof)
91 if (ip1->obj_asof > ip2->obj_asof)
97 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
99 if (info->obj_id < ip->obj_id)
101 if (info->obj_id > ip->obj_id)
103 if (info->obj_asof < ip->obj_asof)
105 if (info->obj_asof > ip->obj_asof)
111 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
113 if (vol1->vol_no < vol2->vol_no)
115 if (vol1->vol_no > vol2->vol_no)
121 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
123 if (cl1->scl_no < cl2->scl_no)
125 if (cl1->scl_no > cl2->scl_no)
131 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
133 if (cl1->clu_no < cl2->clu_no)
135 if (cl1->clu_no > cl2->clu_no)
141 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
143 if (buf1->buf_no < buf2->buf_no)
145 if (buf1->buf_no > buf2->buf_no)
151 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
153 if (node1->node_offset < node2->node_offset)
155 if (node1->node_offset > node2->node_offset)
161 * Note: The lookup function for hammer_ino_rb_tree winds up being named
162 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
163 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
165 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
166 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
167 hammer_inode_info_cmp, hammer_inode_info_t);
168 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
169 hammer_vol_rb_compare, int32_t, vol_no);
170 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
171 hammer_scl_rb_compare, int32_t, scl_no);
172 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
173 hammer_clu_rb_compare, int32_t, clu_no);
174 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
175 hammer_buf_rb_compare, int32_t, buf_no);
176 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
177 hammer_nod_rb_compare, int32_t, node_offset);
179 /************************************************************************
181 ************************************************************************
183 * Load a HAMMER volume by name. Returns 0 on success or a positive error
184 * code on failure. Volumes must be loaded at mount time, get_volume() will
185 * not load a new volume.
187 * Calls made to hammer_load_volume() or single-threaded
190 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
193 hammer_volume_t volume;
194 struct hammer_volume_ondisk *ondisk;
195 struct nlookupdata nd;
196 struct buf *bp = NULL;
201 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
204 * Allocate a volume structure
206 ++hammer_count_volumes;
207 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
208 volume->vol_name = kstrdup(volname, M_HAMMER);
210 hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
211 volume->io.offset = 0LL;
214 * Get the device vnode
216 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
218 error = nlookup(&nd);
220 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
223 vn_isdisk(volume->devvp, &error);
226 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
227 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
229 vn_unlock(volume->devvp);
232 hammer_free_volume(volume);
237 * Extract the volume number from the volume header and do various
240 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
243 ondisk = (void *)bp->b_data;
244 if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
245 kprintf("hammer_mount: volume %s has an invalid header\n",
250 volume->vol_no = ondisk->vol_no;
251 volume->cluster_base = ondisk->vol_clo_beg;
252 volume->vol_clsize = ondisk->vol_clsize;
253 volume->vol_flags = ondisk->vol_flags;
254 volume->nblocks = ondisk->vol_nblocks;
255 RB_INIT(&volume->rb_clus_root);
256 RB_INIT(&volume->rb_scls_root);
258 hmp->mp->mnt_stat.f_blocks += volume->nblocks;
260 if (RB_EMPTY(&hmp->rb_vols_root)) {
261 hmp->fsid = ondisk->vol_fsid;
262 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
263 kprintf("hammer_mount: volume %s's fsid does not match "
264 "other volumes\n", volume->vol_name);
270 * Insert the volume structure into the red-black tree.
272 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
273 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
274 volume->vol_name, volume->vol_no);
279 * Set the root volume and load the root cluster. HAMMER special
280 * cases rootvol and rootcl and will not deallocate the structures.
281 * We do not hold a ref because this would prevent related I/O
282 * from being flushed.
284 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
285 hmp->rootvol = volume;
290 hammer_ref_volume(volume);
291 hmp->rootcl = hammer_get_cluster(volume,
292 ondisk->vol0_root_clu_no,
294 hammer_rel_cluster(hmp->rootcl, 0);
295 hammer_rel_volume(volume, 0);
296 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
302 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
303 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
304 hammer_free_volume(volume);
310 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
311 * so returns -1 on failure.
314 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
316 struct hammer_mount *hmp = volume->hmp;
317 hammer_cluster_t rootcl;
318 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
321 * Sync clusters, sync volume
324 hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
327 * Clean up the root cluster, which is held unlocked in the root
330 if (hmp->rootvol == volume) {
331 if ((rootcl = hmp->rootcl) != NULL)
337 * Unload clusters and super-clusters. Unloading a super-cluster
338 * also unloads related clusters, but the filesystem may not be
339 * using super-clusters so unload clusters anyway.
341 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
342 hammer_unload_cluster, NULL);
343 RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
344 hammer_unload_supercl, NULL);
345 hammer_io_waitdep(&volume->io);
348 * Release our buffer and flush anything left in the buffer cache.
350 hammer_io_release(&volume->io, 2);
353 * There should be no references on the volume, no clusters, and
356 KKASSERT(volume->io.lock.refs == 0);
357 KKASSERT(RB_EMPTY(&volume->rb_clus_root));
358 KKASSERT(RB_EMPTY(&volume->rb_scls_root));
360 volume->ondisk = NULL;
363 vinvalbuf(volume->devvp, 0, 0, 0);
364 VOP_CLOSE(volume->devvp, FREAD);
366 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
367 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
372 * Destroy the structure
374 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
375 hammer_free_volume(volume);
381 hammer_free_volume(hammer_volume_t volume)
383 if (volume->vol_name) {
384 kfree(volume->vol_name, M_HAMMER);
385 volume->vol_name = NULL;
388 vrele(volume->devvp);
389 volume->devvp = NULL;
391 --hammer_count_volumes;
392 kfree(volume, M_HAMMER);
396 * Get a HAMMER volume. The volume must already exist.
399 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
401 struct hammer_volume *volume;
404 * Locate the volume structure
406 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
407 if (volume == NULL) {
411 hammer_ref(&volume->io.lock);
414 * Deal with on-disk info
416 if (volume->ondisk == NULL || volume->io.loading) {
417 *errorp = hammer_load_volume(volume);
419 hammer_rel_volume(volume, 1);
429 hammer_ref_volume(hammer_volume_t volume)
433 hammer_ref(&volume->io.lock);
436 * Deal with on-disk info
438 if (volume->ondisk == NULL || volume->io.loading) {
439 error = hammer_load_volume(volume);
441 hammer_rel_volume(volume, 1);
449 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
451 hammer_volume_t volume;
453 volume = hmp->rootvol;
454 KKASSERT(volume != NULL);
455 hammer_ref(&volume->io.lock);
458 * Deal with on-disk info
460 if (volume->ondisk == NULL || volume->io.loading) {
461 *errorp = hammer_load_volume(volume);
463 hammer_rel_volume(volume, 1);
473 * Load a volume's on-disk information. The volume must be referenced and
474 * not locked. We temporarily acquire an exclusive lock to interlock
475 * against releases or multiple get's.
478 hammer_load_volume(hammer_volume_t volume)
480 struct hammer_volume_ondisk *ondisk;
483 hammer_lock_ex(&volume->io.lock);
484 KKASSERT(volume->io.loading == 0);
485 volume->io.loading = 1;
487 if (volume->ondisk == NULL) {
488 error = hammer_io_read(volume->devvp, &volume->io);
490 volume->io.loading = 0;
491 hammer_unlock(&volume->io.lock);
494 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
497 * Configure the volume's A-lists. These are used to
500 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
501 volume->alist.config = &Vol_super_alist_config;
502 volume->alist.meta = ondisk->vol_almeta.super;
503 volume->alist.info = volume;
505 volume->alist.config = &Vol_normal_alist_config;
506 volume->alist.meta = ondisk->vol_almeta.normal;
507 volume->alist.info = NULL;
512 volume->io.loading = 0;
513 hammer_unlock(&volume->io.lock);
518 * Release a volume. Call hammer_io_release on the last reference. We have
519 * to acquire an exclusive lock to interlock against volume->ondisk tests
520 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
523 * Volumes are not unloaded from memory during normal operation.
526 hammer_rel_volume(hammer_volume_t volume, int flush)
528 if (volume->io.lock.refs == 1) {
529 hammer_lock_ex(&volume->io.lock);
530 if (volume->io.lock.refs == 1) {
531 volume->ondisk = NULL;
532 hammer_io_release(&volume->io, flush);
534 hammer_io_flush(&volume->io);
536 hammer_unlock(&volume->io.lock);
538 hammer_unref(&volume->io.lock);
541 /************************************************************************
543 ************************************************************************
545 * Manage super-clusters. Note that a supercl holds a reference to its
549 hammer_find_supercl(hammer_volume_t volume, int32_t scl_no)
551 if (RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no))
557 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
558 int *errorp, hammer_alloc_state_t isnew)
560 hammer_supercl_t supercl;
563 * Locate and lock the super-cluster structure, creating one
567 supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
568 if (supercl == NULL) {
569 ++hammer_count_supercls;
570 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
571 supercl->scl_no = scl_no;
572 supercl->volume = volume;
573 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
574 hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL);
575 hammer_ref(&supercl->io.lock);
578 * Insert the cluster into the RB tree and handle late
581 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
582 hammer_unref(&supercl->io.lock);
583 --hammer_count_supercls;
584 kfree(supercl, M_HAMMER);
587 hammer_ref(&volume->io.lock);
589 hammer_ref(&supercl->io.lock);
593 * Deal with on-disk info
595 if (supercl->ondisk == NULL || isnew || supercl->io.loading) {
596 *errorp = hammer_load_supercl(supercl, isnew);
598 hammer_rel_supercl(supercl, 1);
608 hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew)
610 struct hammer_supercl_ondisk *ondisk;
611 hammer_volume_t volume = supercl->volume;
615 hammer_lock_ex(&supercl->io.lock);
616 KKASSERT(supercl->io.loading == 0);
617 supercl->io.loading = 1;
619 if (supercl->ondisk == NULL) {
621 error = hammer_io_new(volume->devvp, &supercl->io);
623 error = hammer_io_read(volume->devvp, &supercl->io);
625 supercl->io.loading = 0;
626 hammer_unlock(&supercl->io.lock);
629 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
631 supercl->alist.config = &Supercl_alist_config;
632 supercl->alist.meta = ondisk->scl_meta;
633 supercl->alist.info = NULL;
635 error = hammer_io_new(volume->devvp, &supercl->io);
639 if (error == 0 && isnew) {
641 * If this is a new super-cluster we have to initialize
642 * various ondisk structural elements. The caller is
643 * responsible for the remainder.
645 struct hammer_alist_live dummy;
647 hammer_modify_supercl(supercl);
649 ondisk = supercl->ondisk;
650 dummy.config = &Buf_alist_config;
651 dummy.meta = ondisk->head.buf_almeta;
653 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
655 nclusters = volume->ondisk->vol_nclusters -
656 ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
657 KKASSERT(nclusters > 0);
658 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
659 nclusters = HAMMER_SCL_MAXCLUSTERS;
660 hammer_alist_init(&supercl->alist, 0, (int32_t)nclusters,
663 supercl->io.loading = 0;
664 hammer_unlock(&supercl->io.lock);
669 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
672 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
674 KKASSERT(supercl->io.lock.refs == 0);
675 hammer_ref(&supercl->io.lock);
676 hammer_rel_supercl(supercl, 2);
681 * Release a super-cluster. We have to deal with several places where
682 * another thread can ref the super-cluster.
684 * Only destroy the structure itself if the related buffer cache buffer
685 * was disassociated from it. This ties the management of the structure
686 * to the buffer cache subsystem.
689 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
691 hammer_volume_t volume;
693 if (supercl->io.lock.refs == 1) {
694 hammer_lock_ex(&supercl->io.lock);
695 if (supercl->io.lock.refs == 1) {
696 hammer_io_release(&supercl->io, flush);
697 if (supercl->io.bp == NULL &&
698 supercl->io.lock.refs == 1) {
699 volume = supercl->volume;
700 RB_REMOVE(hammer_scl_rb_tree,
701 &volume->rb_scls_root, supercl);
702 supercl->volume = NULL; /* sanity */
703 --hammer_count_supercls;
704 kfree(supercl, M_HAMMER);
705 hammer_rel_volume(volume, 0);
709 hammer_io_flush(&supercl->io);
711 hammer_unlock(&supercl->io.lock);
713 hammer_unref(&supercl->io.lock);
716 /************************************************************************
718 ************************************************************************
722 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
723 int *errorp, int getflags)
725 hammer_cluster_t cluster;
728 cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
729 if (cluster == NULL) {
730 ++hammer_count_clusters;
731 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
732 cluster->clu_no = clu_no;
733 cluster->volume = volume;
734 RB_INIT(&cluster->rb_bufs_root);
735 RB_INIT(&cluster->rb_nods_root);
736 hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER);
737 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
738 hammer_ref(&cluster->io.lock);
741 * Insert the cluster into the RB tree and handle late
744 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
745 hammer_unref(&cluster->io.lock);
746 --hammer_count_clusters;
747 kfree(cluster, M_HAMMER);
750 hammer_ref(&volume->io.lock);
752 hammer_ref(&cluster->io.lock);
756 * Deal with on-disk info
758 if (cluster->ondisk == NULL || getflags || cluster->io.validated == 0) {
759 *errorp = hammer_load_cluster(cluster, getflags);
761 hammer_rel_cluster(cluster, 1);
771 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
773 hammer_cluster_t cluster;
775 cluster = hmp->rootcl;
776 KKASSERT(cluster != NULL);
777 hammer_ref(&cluster->io.lock);
780 * Deal with on-disk info
782 if (cluster->ondisk == NULL || cluster->io.validated == 0) {
783 *errorp = hammer_load_cluster(cluster, 0);
785 hammer_rel_cluster(cluster, 1);
796 hammer_load_cluster(hammer_cluster_t cluster, int getflags)
798 hammer_volume_t volume = cluster->volume;
799 struct hammer_cluster_ondisk *ondisk;
802 hammer_lock_ex(&cluster->io.lock);
803 KKASSERT(cluster->io.loading == 0);
804 cluster->io.loading = 1;
806 if (cluster->ondisk == NULL) {
807 KKASSERT(TAILQ_EMPTY(&cluster->io.deplist));
810 * Unmodified buffers may be present, indicating that we
811 * had already validated the cluster even though we no longer
812 * have its ondisk info.
814 if (RB_EMPTY(&cluster->rb_bufs_root))
815 cluster->io.validated = 0;
816 if (getflags & GET_CLUSTER_NEW)
817 error = hammer_io_new(volume->devvp, &cluster->io);
819 error = hammer_io_read(volume->devvp, &cluster->io);
821 cluster->io.loading = 0;
822 hammer_unlock(&cluster->io.lock);
825 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
827 cluster->alist_master.config = &Clu_master_alist_config;
828 cluster->alist_master.meta = ondisk->clu_master_meta;
829 cluster->alist_btree.config = &Clu_slave_alist_config;
830 cluster->alist_btree.meta = ondisk->clu_btree_meta;
831 cluster->alist_btree.info = cluster;
832 cluster->alist_record.config = &Clu_slave_alist_config;
833 cluster->alist_record.meta = ondisk->clu_record_meta;
834 cluster->alist_record.info = cluster;
835 cluster->alist_mdata.config = &Clu_slave_alist_config;
836 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
837 cluster->alist_mdata.info = cluster;
839 if ((getflags & GET_CLUSTER_NEW) == 0) {
841 * Load cluster range info for easy access
843 cluster->clu_btree_beg = ondisk->clu_btree_beg;
844 cluster->clu_btree_end = ondisk->clu_btree_end;
846 } else if (getflags & GET_CLUSTER_NEW) {
847 error = hammer_io_new(volume->devvp, &cluster->io);
851 if (error == 0 && (getflags & GET_CLUSTER_NEW)) {
853 * If this is a new cluster we have to initialize
854 * various ondisk structural elements. The caller is
855 * responsible for the remainder.
857 struct hammer_alist_live dummy;
859 hammer_volume_ondisk_t voldisk;
862 hammer_modify_cluster(cluster);
863 ondisk = cluster->ondisk;
864 voldisk = volume->ondisk;
866 dummy.config = &Buf_alist_config;
867 dummy.meta = ondisk->head.buf_almeta;
869 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
871 ondisk->vol_fsid = voldisk->vol_fsid;
872 ondisk->vol_fstype = voldisk->vol_fstype;
874 ondisk->clu_id = 0; /* XXX */
875 ondisk->clu_no = cluster->clu_no;
876 ondisk->clu_flags = 0;
877 ondisk->clu_start = HAMMER_BUFSIZE;
878 ondisk->synchronized_rec_id = 1; /* XXX timestamp */
879 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
880 if (voldisk->vol_clo_end - cluster->io.offset >
881 voldisk->vol_clsize) {
882 ondisk->clu_limit = voldisk->vol_clsize;
884 ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
887 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
888 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
890 hammer_alist_init(&cluster->alist_btree,
891 HAMMER_FSBUF_MAXBLKS,
892 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
893 HAMMER_ASTATE_ALLOC);
894 hammer_alist_init(&cluster->alist_record,
895 HAMMER_FSBUF_MAXBLKS,
896 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
897 HAMMER_ASTATE_ALLOC);
898 hammer_alist_init(&cluster->alist_mdata,
899 HAMMER_FSBUF_MAXBLKS,
900 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
901 HAMMER_ASTATE_ALLOC);
903 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
904 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
905 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
908 * Initialize the B-Tree. We don't know what the caller
909 * intends to do with the cluster so make sure it causes
910 * an assertion if the caller makes no changes.
912 ondisk->clu_btree_parent_vol_no = -2;
913 ondisk->clu_btree_parent_clu_no = -2;
914 ondisk->clu_btree_parent_offset = -2;
915 ondisk->clu_btree_parent_clu_gen = -2;
917 croot = hammer_alloc_btree(cluster, &error);
919 hammer_modify_node(croot);
920 bzero(croot->ondisk, sizeof(*croot->ondisk));
921 croot->ondisk->count = 0;
922 croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
923 hammer_modify_cluster(cluster);
924 ondisk->clu_btree_root = croot->node_offset;
925 hammer_rel_node(croot);
929 * We just formatted this cluster, don't try to recover it!
931 cluster->io.validated = 1;
935 * If no error occured handle automatic cluster recovery unless
936 * the NORECOVER flag is passed.
938 * Setting hammer_debug_recover to 1 will force recovery on load
939 * whether or not the cluster is marked open.
941 * Setting hammer_debug_recover to -1 will force NO recovery
942 * regardless of state.
944 * io.validated can only be cleared if the buffer RB list is empty,
945 * preventing us from trying to recover an actively referenced
946 * cluster (which would blow the filesystem to smithereens).
948 if (error == 0 && cluster->io.validated == 0) {
949 if ((getflags & GET_CLUSTER_NORECOVER) == 0) {
950 if ((cluster->ondisk->clu_flags & HAMMER_CLUF_OPEN) ||
951 hammer_debug_recover > 0) {
952 if (hammer_debug_recover >= 0)
953 hammer_recover(cluster);
955 cluster->io.validated = 1;
958 cluster->io.loading = 0;
959 hammer_unlock(&cluster->io.lock);
964 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
967 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
969 hammer_ref(&cluster->io.lock);
970 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
971 hammer_unload_buffer, NULL);
972 hammer_io_waitdep(&cluster->io);
973 KKASSERT(cluster->io.lock.refs == 1);
974 hammer_rel_cluster(cluster, 2);
979 * Update the cluster's synchronization TID, which is used during cluster
980 * recovery. NOTE: The cluster header is not written out until all related
981 * records have been written out.
984 hammer_alloc_recid(hammer_cluster_t cluster)
988 hammer_modify_cluster(cluster);
989 recid = cluster->ondisk->synchronized_rec_id++;
995 hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid)
997 hammer_modify_cluster(cluster);
998 if (cluster->ondisk->synchronized_tid < tid)
999 cluster->ondisk->synchronized_tid = tid;
1004 * Reference a cluster that is either already referenced or via a specially
1005 * handled pointer (aka rootcl).
1008 hammer_ref_cluster(hammer_cluster_t cluster)
1012 KKASSERT(cluster != NULL);
1013 hammer_ref(&cluster->io.lock);
1016 * Deal with on-disk info
1018 if (cluster->ondisk == NULL || cluster->io.validated == 0) {
1019 error = hammer_load_cluster(cluster, 0);
1021 hammer_rel_cluster(cluster, 1);
1029 * Release a cluster. We have to deal with several places where
1030 * another thread can ref the cluster.
1032 * Only destroy the structure itself if we no longer have an IO or any
1033 * hammer buffers associated with the structure.
1036 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
1038 hammer_volume_t volume;
1040 if (cluster->io.lock.refs == 1) {
1041 hammer_lock_ex(&cluster->io.lock);
1042 if (cluster->io.lock.refs == 1) {
1044 * Release the I/O. If we or the kernel wants to
1045 * flush, this will release the bp. Otherwise the
1046 * bp may be written and flushed passively by the
1049 hammer_io_release(&cluster->io, flush);
1054 if (cluster != cluster->volume->hmp->rootcl &&
1055 cluster->io.bp == NULL &&
1056 cluster->io.lock.refs == 1 &&
1057 RB_EMPTY(&cluster->rb_bufs_root)) {
1058 KKASSERT(RB_EMPTY(&cluster->rb_nods_root));
1059 volume = cluster->volume;
1060 RB_REMOVE(hammer_clu_rb_tree,
1061 &volume->rb_clus_root, cluster);
1062 cluster->volume = NULL; /* sanity */
1063 --hammer_count_clusters;
1064 kfree(cluster, M_HAMMER);
1065 hammer_rel_volume(volume, 0);
1069 hammer_io_flush(&cluster->io);
1071 hammer_unlock(&cluster->io.lock);
1073 hammer_unref(&cluster->io.lock);
1076 /************************************************************************
1078 ************************************************************************
1080 * Manage buffers. Note that a buffer holds a reference to its associated
1081 * cluster, and its cluster will hold a reference to the cluster's volume.
1083 * A non-zero buf_type indicates that a new buffer should be created and
1087 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
1088 u_int64_t buf_type, int *errorp)
1090 hammer_buffer_t buffer;
1093 * Find the buffer. Note that buffer 0 corresponds to the cluster
1094 * header and should never be requested.
1096 KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1097 buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1100 * Locate and lock the buffer structure, creating one if necessary.
1103 buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1104 if (buffer == NULL) {
1105 ++hammer_count_buffers;
1106 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1107 buffer->buf_no = buf_no;
1108 buffer->cluster = cluster;
1109 buffer->volume = cluster->volume;
1110 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
1111 buffer->io.offset = cluster->io.offset +
1112 (buf_no * HAMMER_BUFSIZE);
1113 TAILQ_INIT(&buffer->clist);
1114 hammer_ref(&buffer->io.lock);
1117 * Insert the cluster into the RB tree and handle late
1120 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1121 hammer_unref(&buffer->io.lock);
1122 --hammer_count_buffers;
1123 kfree(buffer, M_HAMMER);
1126 hammer_ref(&cluster->io.lock);
1128 hammer_ref(&buffer->io.lock);
1132 * Deal with on-disk info
1134 if (buffer->ondisk == NULL || buf_type || buffer->io.loading) {
1135 *errorp = hammer_load_buffer(buffer, buf_type);
1137 hammer_rel_buffer(buffer, 1);
1147 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1149 hammer_volume_t volume;
1150 hammer_fsbuf_ondisk_t ondisk;
1154 * Load the buffer's on-disk info
1156 volume = buffer->volume;
1157 hammer_lock_ex(&buffer->io.lock);
1158 KKASSERT(buffer->io.loading == 0);
1159 buffer->io.loading = 1;
1161 if (buffer->ondisk == NULL) {
1163 error = hammer_io_new(volume->devvp, &buffer->io);
1165 error = hammer_io_read(volume->devvp, &buffer->io);
1168 buffer->io.loading = 0;
1169 hammer_unlock(&buffer->io.lock);
1172 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1173 buffer->alist.config = &Buf_alist_config;
1174 buffer->alist.meta = ondisk->head.buf_almeta;
1175 buffer->buf_type = ondisk->head.buf_type;
1176 } else if (buf_type) {
1177 error = hammer_io_new(volume->devvp, &buffer->io);
1181 if (error == 0 && buf_type) {
1182 hammer_modify_buffer(buffer);
1183 ondisk = buffer->ondisk;
1184 hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type);
1185 buffer->buf_type = ondisk->head.buf_type;
1187 buffer->io.loading = 0;
1188 hammer_unlock(&buffer->io.lock);
1193 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1196 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1198 hammer_ref(&buffer->io.lock);
1199 hammer_flush_buffer_nodes(buffer);
1200 KKASSERT(buffer->io.lock.refs == 1);
1201 hammer_rel_buffer(buffer, 2);
1206 * Reference a buffer that is either already referenced or via a specially
1207 * handled pointer (aka cursor->buffer).
1210 hammer_ref_buffer(hammer_buffer_t buffer)
1214 hammer_ref(&buffer->io.lock);
1215 if (buffer->ondisk == NULL || buffer->io.loading) {
1216 error = hammer_load_buffer(buffer, 0);
1218 hammer_rel_buffer(buffer, 1);
1220 * NOTE: buffer pointer can become stale after
1221 * the above release.
1224 KKASSERT(buffer->buf_type ==
1225 buffer->ondisk->head.buf_type);
1234 * Release a buffer. We have to deal with several places where
1235 * another thread can ref the buffer.
1237 * Only destroy the structure itself if the related buffer cache buffer
1238 * was disassociated from it. This ties the management of the structure
1239 * to the buffer cache subsystem. buffer->ondisk determines whether the
1240 * embedded io is referenced or not.
1243 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1245 hammer_cluster_t cluster;
1247 if (buffer->io.lock.refs == 1) {
1248 hammer_lock_ex(&buffer->io.lock);
1249 if (buffer->io.lock.refs == 1) {
1250 hammer_io_release(&buffer->io, flush);
1252 if (buffer->io.bp == NULL &&
1253 buffer->io.lock.refs == 1) {
1254 hammer_flush_buffer_nodes(buffer);
1255 KKASSERT(TAILQ_EMPTY(&buffer->clist));
1256 cluster = buffer->cluster;
1257 RB_REMOVE(hammer_buf_rb_tree,
1258 &cluster->rb_bufs_root, buffer);
1259 buffer->cluster = NULL; /* sanity */
1260 --hammer_count_buffers;
1261 kfree(buffer, M_HAMMER);
1262 hammer_rel_cluster(cluster, 0);
1266 hammer_io_flush(&buffer->io);
1268 hammer_unlock(&buffer->io.lock);
1270 hammer_unref(&buffer->io.lock);
1273 /************************************************************************
1275 ************************************************************************
1277 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1278 * method used by the HAMMER filesystem.
1280 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1281 * associated with its buffer, and will only referenced the buffer while
1282 * the node itself is referenced.
1284 * A hammer_node can also be passively associated with other HAMMER
1285 * structures, such as inodes, while retaining 0 references. These
1286 * associations can be cleared backwards using a pointer-to-pointer in
1289 * This allows the HAMMER implementation to cache hammer_nodes long-term
1290 * and short-cut a great deal of the infrastructure's complexity. In
1291 * most cases a cached node can be reacquired without having to dip into
1292 * either the buffer or cluster management code.
1294 * The caller must pass a referenced cluster on call and will retain
1295 * ownership of the reference on return. The node will acquire its own
1296 * additional references, if necessary.
1299 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1304 * Locate the structure, allocating one if necessary.
1307 node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1310 ++hammer_count_nodes;
1311 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1312 node->node_offset = node_offset;
1313 node->cluster = cluster;
1314 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1316 --hammer_count_nodes;
1317 kfree(node, M_HAMMER);
1321 hammer_ref(&node->lock);
1322 *errorp = hammer_load_node(node);
1324 hammer_rel_node(node);
1331 * Reference an already-referenced node.
1334 hammer_ref_node(hammer_node_t node)
1338 KKASSERT(node->lock.refs > 0);
1339 hammer_ref(&node->lock);
1340 if ((error = hammer_load_node(node)) != 0)
1341 hammer_rel_node(node);
1346 * Load a node's on-disk data reference.
1349 hammer_load_node(hammer_node_t node)
1351 hammer_buffer_t buffer;
1358 hammer_lock_ex(&node->lock);
1359 if (node->ondisk == NULL) {
1361 * This is a little confusing but the jist is that
1362 * node->buffer determines whether the node is on
1363 * the buffer's clist and node->ondisk determines
1364 * whether the buffer is referenced.
1366 if ((buffer = node->buffer) != NULL) {
1367 error = hammer_ref_buffer(buffer);
1369 buf_no = node->node_offset / HAMMER_BUFSIZE;
1370 buffer = hammer_get_buffer(node->cluster,
1373 KKASSERT(error == 0);
1374 TAILQ_INSERT_TAIL(&buffer->clist,
1376 node->buffer = buffer;
1380 node->ondisk = (void *)((char *)buffer->ondisk +
1381 (node->node_offset & HAMMER_BUFMASK));
1384 hammer_unlock(&node->lock);
1389 * Safely reference a node, interlock against flushes via the IO subsystem.
1392 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
1397 if ((node = *cache) != NULL)
1398 hammer_ref(&node->lock);
1400 *errorp = hammer_load_node(node);
1402 hammer_rel_node(node);
1412 * Release a hammer_node. On the last release the node dereferences
1413 * its underlying buffer and may or may not be destroyed.
1416 hammer_rel_node(hammer_node_t node)
1418 hammer_cluster_t cluster;
1419 hammer_buffer_t buffer;
1420 int32_t node_offset;
1424 * If this isn't the last ref just decrement the ref count and
1427 if (node->lock.refs > 1) {
1428 hammer_unref(&node->lock);
1433 * If there is no ondisk info or no buffer the node failed to load,
1434 * remove the last reference and destroy the node.
1436 if (node->ondisk == NULL) {
1437 hammer_unref(&node->lock);
1438 hammer_flush_node(node);
1439 /* node is stale now */
1444 * Do final cleanups and then either destroy the node and leave it
1445 * passively cached. The buffer reference is removed regardless.
1447 buffer = node->buffer;
1448 node->ondisk = NULL;
1450 if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
1451 hammer_unref(&node->lock);
1452 hammer_rel_buffer(buffer, 0);
1457 * Destroy the node. Record pertainant data because the node
1458 * becomes stale the instant we flush it.
1460 flags = node->flags;
1461 node_offset = node->node_offset;
1462 hammer_unref(&node->lock);
1463 hammer_flush_node(node);
1466 cluster = buffer->cluster;
1467 if (flags & HAMMER_NODE_DELETED) {
1468 hammer_free_btree(cluster, node_offset);
1469 if (node_offset == cluster->ondisk->clu_btree_root) {
1470 kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1471 hammer_free_cluster(cluster);
1472 /*hammer_io_undirty(&cluster->io);*/
1475 hammer_rel_buffer(buffer, 0);
1479 * Passively cache a referenced hammer_node in *cache. The caller may
1480 * release the node on return.
1483 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1488 * If the node is being deleted, don't cache it!
1490 if (node->flags & HAMMER_NODE_DELETED)
1494 * Cache the node. If we previously cached a different node we
1495 * have to give HAMMER a chance to destroy it.
1498 if (node->cache1 != cache) {
1499 if (node->cache2 != cache) {
1500 if ((old = *cache) != NULL) {
1501 KKASSERT(node->lock.refs != 0);
1502 hammer_uncache_node(cache);
1506 *node->cache2 = NULL;
1507 node->cache2 = node->cache1;
1508 node->cache1 = cache;
1511 struct hammer_node **tmp;
1513 node->cache1 = node->cache2;
1520 hammer_uncache_node(struct hammer_node **cache)
1524 if ((node = *cache) != NULL) {
1526 if (node->cache1 == cache) {
1527 node->cache1 = node->cache2;
1528 node->cache2 = NULL;
1529 } else if (node->cache2 == cache) {
1530 node->cache2 = NULL;
1532 panic("hammer_uncache_node: missing cache linkage");
1534 if (node->cache1 == NULL && node->cache2 == NULL)
1535 hammer_flush_node(node);
1540 * Remove a node's cache references and destroy the node if it has no
1541 * other references or backing store.
1544 hammer_flush_node(hammer_node_t node)
1546 hammer_buffer_t buffer;
1549 *node->cache1 = NULL;
1551 *node->cache2 = NULL;
1552 if (node->lock.refs == 0 && node->ondisk == NULL) {
1553 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1555 if ((buffer = node->buffer) != NULL) {
1556 node->buffer = NULL;
1557 TAILQ_REMOVE(&buffer->clist, node, entry);
1558 /* buffer is unreferenced because ondisk is NULL */
1560 --hammer_count_nodes;
1561 kfree(node, M_HAMMER);
1566 * Flush passively cached B-Tree nodes associated with this buffer.
1567 * This is only called when the buffer is about to be destroyed, so
1568 * none of the nodes should have any references.
1571 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1575 while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1576 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1577 hammer_ref(&node->lock);
1578 node->flags |= HAMMER_NODE_FLUSH;
1579 hammer_rel_node(node);
1583 /************************************************************************
1584 * A-LIST ALLOCATORS *
1585 ************************************************************************/
1588 * Allocate HAMMER clusters
1591 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1594 hammer_volume_t volume;
1595 hammer_cluster_t cluster;
1602 * Figure out our starting volume and hint.
1605 vol_beg = cluster_hint->volume->vol_no;
1606 clu_hint = cluster_hint->clu_no;
1608 vol_beg = hmp->volume_iterator;
1613 * Loop through volumes looking for a free cluster. If allocating
1614 * a new cluster relative to an existing cluster try to find a free
1615 * cluster on either side (clu_hint >= 0), otherwise just do a
1616 * forwards iteration.
1620 volume = hammer_get_volume(hmp, vol_no, errorp);
1621 kprintf("VOLUME %p %d\n", volume, vol_no);
1623 clu_no = HAMMER_ALIST_BLOCK_NONE;
1626 hammer_modify_volume(volume);
1627 if (clu_hint == -1) {
1628 clu_hint = volume->clu_iterator;
1629 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1631 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1632 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1636 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1638 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1639 clu_no = hammer_alist_alloc_rev(&volume->alist,
1643 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1645 hammer_rel_volume(volume, 0);
1648 vol_no = (vol_no + 1) % hmp->nvolumes;
1650 } while (vol_no != vol_beg);
1653 * Acquire the cluster. On success this will force *errorp to 0.
1655 if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1656 kprintf("ALLOC CLUSTER %d:%d\n", volume->vol_no, clu_no);
1657 cluster = hammer_get_cluster(volume, clu_no, errorp,
1659 volume->clu_iterator = clu_no;
1660 hammer_rel_volume(volume, 0);
1665 hammer_lock_ex(&cluster->io.lock);
1670 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound,
1671 hammer_base_elm_t right_bound)
1673 hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1675 hammer_modify_cluster(cluster);
1676 ondisk->clu_btree_beg = *left_bound;
1677 ondisk->clu_btree_end = *right_bound;
1678 cluster->clu_btree_beg = ondisk->clu_btree_beg;
1679 cluster->clu_btree_end = ondisk->clu_btree_end;
1683 * Deallocate a cluster
1686 hammer_free_cluster(hammer_cluster_t cluster)
1688 hammer_modify_volume(cluster->volume);
1689 hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1693 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1695 * The passed *bufferp should be initialized to NULL. On successive calls
1696 * *bufferp caches the most recent buffer used until put away by the caller.
1697 * Note that previously returned pointers using the cached buffer become
1698 * invalid on successive calls which reuse *bufferp.
1700 * All allocations first attempt to use the block found at the specified
1701 * iterator. If that fails the first available block is used. If that
1702 * fails a new buffer is allocated and associated with the buffer type
1703 * A-list and the element is allocated out of the new buffer.
1705 * This function also ensures that the required minimum number of buffers is
1706 * reserved to guarantee that recovery operations succeed.
1710 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1712 hammer_buffer_t buffer;
1713 hammer_alist_t live;
1717 int32_t node_offset;
1720 hammer_modify_cluster(cluster);
1722 live = &cluster->alist_btree;
1725 * If we aren't recovering then ensure the required minimum
1726 * reservation is met. XXX if the recovery code packs the B-Tree
1727 * we don't have to do this.
1729 * Calculate the number of buffers needed to hold the B-Tree.
1731 if (cluster->io.validated) {
1732 n = (cluster->ondisk->stat_records * 3 /
1733 HAMMER_BTREE_INT_ELMS / HAMMER_BTREE_NODES) + 1;
1734 if (cluster->ondisk->stat_idx_bufs < n) {
1735 kprintf("hammer_alloc_btree: %d/%d buffers\n",
1736 cluster->ondisk->stat_idx_bufs, n);
1738 while (cluster->ondisk->stat_idx_bufs < n) {
1739 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1740 cluster->ondisk->idx_index, errorp,
1744 hammer_rel_buffer(buffer, 0);
1752 * Allocate a B-Tree element
1754 elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1755 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1756 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1757 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1758 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1759 cluster->ondisk->idx_index, errorp, &buffer);
1760 elm_no = hammer_alist_alloc(live, 1);
1761 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1764 hammer_rel_buffer(buffer, 0);
1768 cluster->ondisk->idx_index = elm_no;
1769 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1772 * Load and return the B-Tree element
1774 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1775 node_offset = buf_no * HAMMER_BUFSIZE +
1776 offsetof(union hammer_fsbuf_ondisk,
1777 btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1778 node = hammer_get_node(cluster, node_offset, errorp);
1780 hammer_modify_node(node);
1781 bzero(node->ondisk, sizeof(*node->ondisk));
1783 hammer_alist_free(live, elm_no, 1);
1784 hammer_rel_node(node);
1788 hammer_rel_buffer(buffer, 0);
1793 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1794 int *errorp, struct hammer_buffer **bufferp)
1796 hammer_buffer_t buffer;
1797 hammer_alist_t live;
1804 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1805 * for these allocations.
1807 hammer_modify_cluster(cluster);
1808 if ((bytes & HAMMER_BUFMASK) == 0) {
1809 nblks = bytes / HAMMER_BUFSIZE;
1810 /* only one block allowed for now (so buffer can hold it) */
1811 KKASSERT(nblks == 1);
1813 buf_no = hammer_alloc_master(cluster, nblks,
1814 cluster->ondisk->idx_ldata, 1);
1815 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1819 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1820 cluster->ondisk->idx_ldata = buf_no;
1822 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1824 hammer_rel_buffer(buffer, 0);
1826 return(buffer->ondisk);
1830 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1831 * (64 bytes) for these allocations.
1833 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1834 nblks /= HAMMER_DATA_BLKSIZE;
1835 live = &cluster->alist_mdata;
1836 elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1837 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1838 elm_no = hammer_alist_alloc_fwd(live, nblks, 0);
1839 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1840 alloc_new_buffer(cluster, HAMMER_FSBUF_DATA, live,
1841 cluster->ondisk->idx_data, errorp, bufferp);
1842 elm_no = hammer_alist_alloc(live, nblks);
1843 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1848 cluster->ondisk->idx_index = elm_no;
1851 * Load and return the B-Tree element
1853 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1855 if (buffer == NULL || buffer->cluster != cluster ||
1856 buffer->buf_no != buf_no) {
1858 hammer_rel_buffer(buffer, 0);
1859 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1862 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1863 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1864 hammer_modify_buffer(buffer);
1865 item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1866 bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1872 hammer_alloc_record(hammer_cluster_t cluster, int *errorp,
1873 u_int8_t rec_type, struct hammer_buffer **bufferp)
1875 hammer_buffer_t buffer;
1876 hammer_alist_t live;
1882 * Allocate a record element
1884 hammer_modify_cluster(cluster);
1885 live = &cluster->alist_record;
1886 elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1887 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1888 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1889 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1890 alloc_new_buffer(cluster, HAMMER_FSBUF_RECORDS, live,
1891 cluster->ondisk->idx_record, errorp, bufferp);
1892 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1893 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1894 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1899 cluster->ondisk->idx_record = elm_no;
1902 * Load and return the record element
1904 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1906 if (buffer == NULL || buffer->cluster != cluster ||
1907 buffer->buf_no != buf_no) {
1909 hammer_rel_buffer(buffer, 0);
1910 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1913 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1914 KASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES,
1915 ("elm_no %d (%d) out of bounds", elm_no, elm_no & HAMMER_FSBUF_BLKMASK));
1916 hammer_modify_buffer(buffer);
1917 item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1918 bzero(item, sizeof(union hammer_record_ondisk));
1920 ++cluster->ondisk->stat_records;
1921 if (rec_type == HAMMER_RECTYPE_CLUSTER)
1922 ++cluster->ondisk->stat_records;
1927 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1931 hammer_alist_t live;
1933 hammer_modify_cluster(buffer->cluster);
1934 if ((bytes & HAMMER_BUFMASK) == 0) {
1935 nblks = bytes / HAMMER_BUFSIZE;
1936 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1937 hammer_alist_free(&buffer->cluster->alist_master,
1938 buffer->buf_no, nblks);
1939 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1943 elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1944 HAMMER_DATA_BLKSIZE;
1945 KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1946 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1947 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1948 nblks /= HAMMER_DATA_BLKSIZE;
1949 live = &buffer->cluster->alist_mdata;
1950 hammer_alist_free(live, elm_no, nblks);
1954 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec,
1958 hammer_alist_t live;
1960 hammer_modify_cluster(buffer->cluster);
1961 elm_no = rec - &buffer->ondisk->record.recs[0];
1962 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1963 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1964 live = &buffer->cluster->alist_record;
1965 hammer_alist_free(live, elm_no, 1);
1966 --buffer->cluster->ondisk->stat_records;
1967 if (rec_type == HAMMER_RECTYPE_CLUSTER)
1968 --buffer->cluster->ondisk->stat_records;
1972 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1974 const int32_t blksize = sizeof(struct hammer_node_ondisk);
1975 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1976 hammer_alist_t live;
1979 hammer_modify_cluster(cluster);
1980 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1981 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1982 live = &cluster->alist_btree;
1983 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1984 elm_no += fsbuf_offset / blksize;
1985 hammer_alist_free(live, elm_no, 1);
1989 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1991 const int32_t blksize = HAMMER_DATA_BLKSIZE;
1992 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1993 hammer_alist_t live;
1998 hammer_modify_cluster(cluster);
1999 if ((bytes & HAMMER_BUFMASK) == 0) {
2000 nblks = bytes / HAMMER_BUFSIZE;
2001 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
2002 buf_no = bclu_offset / HAMMER_BUFSIZE;
2003 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
2004 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
2008 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
2009 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
2010 live = &cluster->alist_mdata;
2011 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
2012 nblks /= HAMMER_DATA_BLKSIZE;
2013 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
2014 elm_no += fsbuf_offset / blksize;
2015 hammer_alist_free(live, elm_no, nblks);
2019 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset,
2022 const int32_t blksize = sizeof(union hammer_record_ondisk);
2023 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
2024 hammer_alist_t live;
2027 hammer_modify_cluster(cluster);
2028 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
2029 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
2030 live = &cluster->alist_record;
2031 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
2032 elm_no += fsbuf_offset / blksize;
2033 hammer_alist_free(live, elm_no, 1);
2034 --cluster->ondisk->stat_records;
2035 if (rec_type == HAMMER_RECTYPE_CLUSTER)
2036 --cluster->ondisk->stat_records;
2041 * Allocate a new filesystem buffer and assign it to the specified
2042 * filesystem buffer type. The new buffer will be added to the
2043 * type-specific A-list and initialized.
2046 alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live,
2047 int start, int *errorp, struct hammer_buffer **bufferp)
2049 hammer_buffer_t buffer;
2055 hammer_rel_buffer(*bufferp, 0);
2058 start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */
2059 isfwd = (type != HAMMER_FSBUF_RECORDS);
2060 buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
2061 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2067 * The new buffer must be initialized (type != 0) regardless of
2068 * whether we already have it cached or not, so don't try to
2069 * optimize the cached buffer check. Just call hammer_get_buffer().
2071 buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
2075 * Do a meta-free of the buffer's elements into the type-specific
2076 * A-list and update our statistics to reflect the allocation.
2080 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
2081 buf_no, type, nelements);
2083 hammer_modify_buffer(buffer); /*XXX*/
2084 hammer_adjust_stats(cluster, type, 1);
2087 * Free the buffer to the appropriate slave list so the
2088 * cluster-based allocator sees it.
2090 base_blk = buf_no * HAMMER_FSBUF_MAXBLKS;
2093 case HAMMER_FSBUF_BTREE:
2094 hammer_alist_free(live, base_blk, HAMMER_BTREE_NODES);
2096 case HAMMER_FSBUF_DATA:
2097 hammer_alist_free(live, base_blk, HAMMER_DATA_NODES);
2099 case HAMMER_FSBUF_RECORDS:
2100 hammer_alist_free(live, base_blk, HAMMER_RECORD_NODES);
2107 * Sync dirty buffers to the media
2110 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2111 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2114 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2116 struct hammer_sync_info info;
2119 info.waitfor = waitfor;
2121 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2122 hammer_sync_scan1, hammer_sync_scan2, &info);
2124 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2125 hammer_sync_volume, &info);
2130 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2132 struct hammer_inode *ip;
2135 if (vp->v_type == VNON || ip == NULL ||
2136 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2137 RB_EMPTY(&vp->v_rbdirty_tree))) {
2144 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2146 struct hammer_sync_info *info = data;
2147 struct hammer_inode *ip;
2151 if (vp->v_type == VNON || vp->v_type == VBAD ||
2152 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2153 RB_EMPTY(&vp->v_rbdirty_tree))) {
2156 if (vp->v_type != VCHR) {
2157 error = VOP_FSYNC(vp, info->waitfor);
2159 info->error = error;
2165 hammer_sync_volume(hammer_volume_t volume, void *data)
2167 struct hammer_sync_info *info = data;
2169 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2170 hammer_sync_cluster, info);
2171 if (hammer_ref_volume(volume) == 0)
2172 hammer_rel_volume(volume, 1);
2177 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2179 struct hammer_sync_info *info = data;
2181 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2182 hammer_sync_buffer, info);
2183 /*hammer_io_waitdep(&cluster->io);*/
2184 if (hammer_ref_cluster(cluster) == 0)
2185 hammer_rel_cluster(cluster, 1);
2190 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
2192 if (hammer_ref_buffer(buffer) == 0)
2193 hammer_rel_buffer(buffer, 1);
2198 * Generic buffer initialization. Initialize the A-list into an all-allocated
2199 * state with the free block limit properly set.
2201 * Note that alloc_new_buffer() will free the appropriate block range via
2202 * the appropriate cluster alist, so the free count is properly propogated.
2205 hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2207 head->buf_type = type;
2210 case HAMMER_FSBUF_BTREE:
2211 hammer_alist_init(live, 0, HAMMER_BTREE_NODES,
2212 HAMMER_ASTATE_ALLOC);
2214 case HAMMER_FSBUF_DATA:
2215 hammer_alist_init(live, 0, HAMMER_DATA_NODES,
2216 HAMMER_ASTATE_ALLOC);
2218 case HAMMER_FSBUF_RECORDS:
2219 hammer_alist_init(live, 0, HAMMER_RECORD_NODES,
2220 HAMMER_ASTATE_ALLOC);
2223 hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
2229 * Calculate the cluster's offset in the volume. This calculation is
2230 * slightly more complex when using superclusters because superclusters
2231 * are grouped in blocks of 16, followed by 16 x N clusters where N
2232 * is the number of clusters a supercluster can manage.
2235 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2238 int64_t scl_group_size;
2241 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2242 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2243 HAMMER_SCL_MAXCLUSTERS;
2245 ((int64_t)HAMMER_BUFSIZE *
2246 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2247 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2248 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2250 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2252 off = volume->cluster_base +
2253 scl_group * scl_group_size +
2254 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2255 ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2256 HAMMER_VOL_SUPERCLUSTER_GROUP))
2257 * volume->vol_clsize;
2259 off = volume->cluster_base +
2260 (int64_t)clu_no * volume->vol_clsize;
2266 * Calculate a super-cluster's offset in the volume.
2269 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2273 int64_t scl_group_size;
2275 KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2276 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2279 ((int64_t)HAMMER_BUFSIZE *
2280 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2281 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2282 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2284 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2285 off = volume->cluster_base + (scl_group * scl_group_size) +
2286 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2288 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2294 * Allocate nblks buffers from the cluster's master alist.
2297 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2298 int32_t start, int isfwd)
2302 hammer_modify_cluster(cluster);
2304 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2306 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2307 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2311 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2313 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2314 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2315 nblks, HAMMER_ALIST_BLOCK_MAX);
2320 * Recover space from empty record, b-tree, and data a-lists.
2327 * Adjust allocation statistics
2330 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2332 hammer_modify_cluster(cluster);
2333 hammer_modify_volume(cluster->volume);
2334 hammer_modify_volume(cluster->volume->hmp->rootvol);
2337 case HAMMER_FSBUF_BTREE:
2338 cluster->ondisk->stat_idx_bufs += nblks;
2339 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2340 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2342 case HAMMER_FSBUF_DATA:
2343 cluster->ondisk->stat_data_bufs += nblks;
2344 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2345 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2347 case HAMMER_FSBUF_RECORDS:
2348 cluster->ondisk->stat_rec_bufs += nblks;
2349 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2350 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2358 * Setup the parameters for the various A-lists we use in hammer. The
2359 * supercluster A-list must be chained to the cluster A-list and cluster
2360 * slave A-lists are chained to buffer A-lists.
2362 * See hammer_init_alist_config() below.
2366 * A-LIST - cluster recursion into a filesystem buffer
2368 * In the init case the buffer has already been initialized by
2369 * alloc_new_buffer() when it allocated the buffer out of the master
2370 * alist and marked it as free in the slave alist.
2372 * Because we use a somewhat odd mechanism to assign buffers to slave
2373 * pools we can't actually free the buffer back to the master alist in
2374 * buffer_alist_destroy(), but instead must deal with that logic somewhere
2378 buffer_alist_init(void *info, int32_t blk, int32_t radix,
2379 hammer_alloc_state_t state)
2385 * Note: This routine is only called when freeing the last elements of
2386 * an initialized buffer. Freeing all elements of the buffer when the
2387 * buffer was not previously initialized does not call this routine.
2390 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2392 hammer_cluster_t cluster = info;
2395 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2396 kprintf("destroy buffer %d:%d:%d\n", cluster->volume->vol_no, cluster->clu_no, buf_no);
2401 * Note: atblk can be negative and atblk - blk can go negative.
2404 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2405 int32_t count, int32_t atblk, int32_t *fullp)
2407 hammer_cluster_t cluster = info;
2408 hammer_buffer_t buffer;
2413 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2414 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2416 KKASSERT(buffer->ondisk->head.buf_type != 0);
2418 hammer_modify_buffer(buffer);
2419 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2420 if (r != HAMMER_ALIST_BLOCK_NONE)
2422 *fullp = hammer_alist_isfull(&buffer->alist);
2423 hammer_rel_buffer(buffer, 0);
2425 r = HAMMER_ALIST_BLOCK_NONE;
2432 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2433 int32_t count, int32_t atblk, int32_t *fullp)
2435 hammer_cluster_t cluster = info;
2436 hammer_buffer_t buffer;
2441 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2442 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2444 KKASSERT(buffer->ondisk->head.buf_type != 0);
2445 hammer_modify_buffer(buffer);
2446 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2447 if (r != HAMMER_ALIST_BLOCK_NONE)
2449 *fullp = hammer_alist_isfull(&buffer->alist);
2450 hammer_rel_buffer(buffer, 0);
2452 r = HAMMER_ALIST_BLOCK_NONE;
2459 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2460 int32_t base_blk, int32_t count, int32_t *emptyp)
2462 hammer_cluster_t cluster = info;
2463 hammer_buffer_t buffer;
2467 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2468 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2470 KKASSERT(buffer->ondisk->head.buf_type != 0);
2471 hammer_modify_buffer(buffer);
2472 hammer_alist_free(&buffer->alist, base_blk, count);
2473 *emptyp = hammer_alist_isempty(&buffer->alist);
2474 hammer_rel_buffer(buffer, 0);
2481 buffer_alist_find(void *info, int32_t blk, int32_t radix, int32_t atblk,
2484 hammer_cluster_t cluster = info;
2485 hammer_buffer_t buffer;
2490 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2491 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2493 KKASSERT(buffer->ondisk->head.buf_type != 0);
2494 switch(buffer->ondisk->head.buf_type) {
2495 case HAMMER_FSBUF_RECORDS:
2496 maxblks = HAMMER_RECORD_NODES;
2498 case HAMMER_FSBUF_BTREE:
2499 maxblks = HAMMER_BTREE_NODES;
2501 case HAMMER_FSBUF_DATA:
2502 maxblks = HAMMER_DATA_NODES;
2505 panic("buffer_alist_find: unknown buffer type");
2509 blk = hammer_alist_find(&buffer->alist, atblk - blk, maxblks,
2511 hammer_rel_buffer(buffer, 0);
2513 blk = HAMMER_ALIST_BLOCK_NONE;
2519 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2524 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2525 * into a filesystem buffer. A-List's are mostly self-contained entities,
2526 * but callbacks must be installed to recurse from one A-List to another.
2528 * Implementing these callbacks allows us to operate a multi-layered A-List
2529 * as a single entity.
2533 * This occurs when allocating a cluster via the volume a-list and the
2534 * entry in the volume a-list indicated all-free. The underlying supercl
2535 * has not yet been initialized.
2538 super_alist_init(void *info, int32_t blk, int32_t radix,
2539 hammer_alloc_state_t state)
2541 hammer_volume_t volume = info;
2542 hammer_supercl_t supercl;
2547 * Calculate the super-cluster number containing the cluster (blk)
2548 * and obtain the super-cluster buffer.
2550 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2551 supercl = hammer_get_supercl(volume, scl_no, &error, state);
2553 hammer_rel_supercl(supercl, 0);
2558 super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2560 hammer_volume_t volume = info;
2561 hammer_supercl_t supercl;
2566 * Calculate the super-cluster number containing the cluster (blk)
2567 * and obtain the super-cluster buffer.
2569 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2570 supercl = hammer_get_supercl(volume, scl_no, &error,
2571 HAMMER_ASTATE_NONE);
2573 hammer_modify_supercl(supercl);
2574 error = hammer_alist_recover(&supercl->alist, blk, 0, count);
2575 /* free block count is returned if >= 0 */
2576 hammer_rel_supercl(supercl, 0);
2584 * This occurs when freeing a cluster via the volume a-list and the
2585 * supercl is now 100% free. We can destroy the supercl.
2587 * What we actually do is just unset the modify bit so it doesn't get
2591 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2593 hammer_volume_t volume = info;
2594 hammer_supercl_t supercl;
2599 * Calculate the super-cluster number containing the cluster (blk)
2600 * and obtain the super-cluster buffer.
2602 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2603 if (hammer_find_supercl(volume, scl_no)) {
2604 supercl = hammer_get_supercl(volume, scl_no, &error,
2605 HAMMER_ASTATE_FREE);
2608 hammer_io_clear_modify(&supercl->io);
2609 hammer_rel_supercl(supercl, 0);
2616 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2617 int32_t count, int32_t atblk, int32_t *fullp)
2619 hammer_volume_t volume = info;
2620 hammer_supercl_t supercl;
2625 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2626 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2628 hammer_modify_supercl(supercl);
2629 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2630 if (r != HAMMER_ALIST_BLOCK_NONE)
2632 *fullp = hammer_alist_isfull(&supercl->alist);
2633 hammer_rel_supercl(supercl, 0);
2635 r = HAMMER_ALIST_BLOCK_NONE;
2642 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2643 int32_t count, int32_t atblk, int32_t *fullp)
2645 hammer_volume_t volume = info;
2646 hammer_supercl_t supercl;
2651 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2652 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2654 hammer_modify_supercl(supercl);
2655 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2656 if (r != HAMMER_ALIST_BLOCK_NONE)
2658 *fullp = hammer_alist_isfull(&supercl->alist);
2659 hammer_rel_supercl(supercl, 0);
2661 r = HAMMER_ALIST_BLOCK_NONE;
2668 super_alist_free(void *info, int32_t blk, int32_t radix,
2669 int32_t base_blk, int32_t count, int32_t *emptyp)
2671 hammer_volume_t volume = info;
2672 hammer_supercl_t supercl;
2676 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2677 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2679 hammer_modify_supercl(supercl);
2680 hammer_alist_free(&supercl->alist, base_blk, count);
2681 *emptyp = hammer_alist_isempty(&supercl->alist);
2682 hammer_rel_supercl(supercl, 0);
2689 super_alist_find(void *info, int32_t blk, int32_t radix, int32_t atblk,
2692 hammer_volume_t volume = info;
2693 hammer_supercl_t supercl;
2698 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2699 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2701 nclusters = supercl->volume->ondisk->vol_nclusters -
2702 ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
2703 KKASSERT(nclusters > 0);
2704 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
2705 nclusters = HAMMER_SCL_MAXCLUSTERS;
2706 blk = hammer_alist_find(&supercl->alist, atblk - blk,
2708 hammer_rel_supercl(supercl, 0);
2710 blk = HAMMER_ALIST_BLOCK_NONE;
2716 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2721 hammer_init_alist_config(void)
2723 hammer_alist_config_t config;
2725 hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2726 1, HAMMER_FSBUF_METAELMS, 0);
2727 hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2728 1, HAMMER_VOL_METAELMS_1LYR, 0);
2729 hammer_alist_template(&Vol_super_alist_config,
2730 HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2731 HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR,
2733 hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2734 1, HAMMER_SUPERCL_METAELMS, 0);
2735 hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2736 1, HAMMER_CLU_MASTER_METAELMS, 0);
2737 hammer_alist_template(&Clu_slave_alist_config,
2738 HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2739 HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS,
2742 config = &Vol_super_alist_config;
2743 config->bl_radix_init = super_alist_init;
2744 config->bl_radix_recover = super_alist_recover;
2745 config->bl_radix_destroy = super_alist_destroy;
2746 config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2747 config->bl_radix_alloc_rev = super_alist_alloc_rev;
2748 config->bl_radix_free = super_alist_free;
2749 config->bl_radix_find = super_alist_find;
2750 config->bl_radix_print = super_alist_print;
2752 config = &Clu_slave_alist_config;
2753 config->bl_radix_init = buffer_alist_init;
2754 config->bl_radix_recover = buffer_alist_recover;
2755 config->bl_radix_destroy = buffer_alist_destroy;
2756 config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2757 config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2758 config->bl_radix_free = buffer_alist_free;
2759 config->bl_radix_find = buffer_alist_find;
2760 config->bl_radix_print = buffer_alist_print;