HAMMER 3/many - more core infrastructure.
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.4 2007/11/19 00:53:40 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54                         hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56                         u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58                         hammer_alist_t live, u_int64_t type, int32_t nelements,
59                         int32_t start,
60                         int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63                         int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65                         int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69
70 struct hammer_alist_config Buf_alist_config;
71 struct hammer_alist_config Vol_normal_alist_config;
72 struct hammer_alist_config Vol_super_alist_config;
73 struct hammer_alist_config Supercl_alist_config;
74 struct hammer_alist_config Clu_master_alist_config;
75 struct hammer_alist_config Clu_slave_alist_config;
76
77 /*
78  * Red-Black tree support for various structures
79  */
80 static int
81 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
82 {
83         if (ip1->obj_id < ip2->obj_id)
84                 return(-1);
85         if (ip1->obj_id > ip2->obj_id)
86                 return(1);
87         if (ip1->obj_asof < ip2->obj_asof)
88                 return(-1);
89         if (ip1->obj_asof > ip2->obj_asof)
90                 return(1);
91         return(0);
92 }
93
94 static int
95 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
96 {
97         if (info->obj_id < ip->obj_id)
98                 return(-1);
99         if (info->obj_id > ip->obj_id)
100                 return(1);
101         if (info->obj_asof < ip->obj_asof)
102                 return(-1);
103         if (info->obj_asof > ip->obj_asof)
104                 return(1);
105         return(0);
106 }
107
108 static int
109 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
110 {
111         if (vol1->vol_no < vol2->vol_no)
112                 return(-1);
113         if (vol1->vol_no > vol2->vol_no)
114                 return(1);
115         return(0);
116 }
117
118 static int
119 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
120 {
121         if (cl1->scl_no < cl2->scl_no)
122                 return(-1);
123         if (cl1->scl_no > cl2->scl_no)
124                 return(1);
125         return(0);
126 }
127
128 static int
129 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
130 {
131         if (cl1->clu_no < cl2->clu_no)
132                 return(-1);
133         if (cl1->clu_no > cl2->clu_no)
134                 return(1);
135         return(0);
136 }
137
138 static int
139 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
140 {
141         if (buf1->buf_no < buf2->buf_no)
142                 return(-1);
143         if (buf1->buf_no > buf2->buf_no)
144                 return(1);
145         return(0);
146 }
147
148 static int
149 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
150 {
151         if (node1->node_offset < node2->node_offset)
152                 return(-1);
153         if (node1->node_offset < node2->node_offset)
154                 return(1);
155         return(0);
156 }
157
158 /*
159  * Note: The lookup function for hammer_ino_rb_tree winds up being named
160  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
161  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
162  */
163 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
164 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
165                 hammer_inode_info_cmp, hammer_inode_info_t);
166 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
167              hammer_vol_rb_compare, int32_t, vol_no);
168 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
169              hammer_scl_rb_compare, int32_t, scl_no);
170 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
171              hammer_clu_rb_compare, int32_t, clu_no);
172 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
173              hammer_buf_rb_compare, int32_t, buf_no);
174 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
175              hammer_nod_rb_compare, int32_t, node_offset);
176
177 /************************************************************************
178  *                              VOLUMES                                 *
179  ************************************************************************
180  *
181  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
182  * code on failure.  Volumes must be loaded at mount time, get_volume() will
183  * not load a new volume.
184  *
185  * Calls made to hammer_load_volume() or single-threaded
186  */
187 int
188 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
189 {
190         struct mount *mp;
191         hammer_volume_t volume;
192         struct hammer_volume_ondisk *ondisk;
193         struct nlookupdata nd;
194         struct buf *bp = NULL;
195         int error;
196         int ronly;
197
198         mp = hmp->mp;
199         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
200
201         /*
202          * Allocate a volume structure
203          */
204         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
205         volume->vol_name = kstrdup(volname, M_HAMMER);
206         volume->hmp = hmp;
207         volume->io.type = HAMMER_STRUCTURE_VOLUME;
208         volume->io.offset = 0LL;
209
210         /*
211          * Get the device vnode
212          */
213         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
214         if (error == 0)
215                 error = nlookup(&nd);
216         if (error == 0)
217                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
218         nlookup_done(&nd);
219         if (error == 0) {
220                 vn_isdisk(volume->devvp, &error);
221         }
222         if (error == 0) {
223                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
224                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
225                                  FSCRED, NULL);
226                 vn_unlock(volume->devvp);
227         }
228         if (error) {
229                 hammer_free_volume(volume);
230                 return(error);
231         }
232
233         /*
234          * Extract the volume number from the volume header and do various
235          * sanity checks.
236          */
237         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
238         if (error)
239                 goto late_failure;
240         ondisk = (void *)bp->b_data;
241         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
242                 kprintf("hammer_mount: volume %s has an invalid header\n",
243                         volume->vol_name);
244                 error = EFTYPE;
245                 goto late_failure;
246         }
247         volume->vol_no = ondisk->vol_no;
248         volume->cluster_base = ondisk->vol_beg;
249         volume->vol_clsize = ondisk->vol_clsize;
250         volume->vol_flags = ondisk->vol_flags;
251         RB_INIT(&volume->rb_clus_root);
252         RB_INIT(&volume->rb_scls_root);
253
254         if (RB_EMPTY(&hmp->rb_vols_root)) {
255                 hmp->fsid = ondisk->vol_fsid;
256         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
257                 kprintf("hammer_mount: volume %s's fsid does not match "
258                         "other volumes\n", volume->vol_name);
259                 error = EFTYPE;
260                 goto late_failure;
261         }
262
263         /*
264          * Insert the volume structure into the red-black tree.
265          */
266         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
267                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
268                         volume->vol_name, volume->vol_no);
269                 error = EEXIST;
270         }
271
272         /*
273          * Set the root volume and load the root cluster.  HAMMER special
274          * cases rootvol and rootcl and will not deallocate the structures.
275          * We do not hold a ref because this would prevent related I/O
276          * from being flushed.
277          */
278         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
279                 hmp->rootvol = volume;
280                 hmp->rootcl = hammer_get_cluster(volume,
281                                                  ondisk->vol0_root_clu_no,
282                                                  &error, 0);
283                 hammer_rel_cluster(hmp->rootcl, 0);
284                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
285         }
286 late_failure:
287         if (bp)
288                 brelse(bp);
289         if (error) {
290                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
291                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
292                 hammer_free_volume(volume);
293         }
294         return (error);
295 }
296
297 /*
298  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
299  * so returns -1 on failure.
300  */
301 int
302 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
303 {
304         struct hammer_mount *hmp = volume->hmp;
305         hammer_cluster_t rootcl;
306         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
307
308         /*
309          * Sync clusters, sync volume
310          */
311
312         /*
313          * Clean up the root cluster, which is held unlocked in the root
314          * volume.
315          */
316         hammer_ref(&volume->io.lock);
317         if (hmp->rootvol == volume) {
318                 if ((rootcl = hmp->rootcl) != NULL)
319                         hmp->rootcl = NULL;
320                 hmp->rootvol = NULL;
321         }
322
323         /*
324          * Flush the volume
325          */
326         KKASSERT(volume->io.lock.refs == 1);
327         hammer_io_release(&volume->io, 1);
328         volume->ondisk = NULL;
329         if (volume->devvp) {
330                 if (ronly) {
331                         vinvalbuf(volume->devvp, 0, 0, 0);
332                         VOP_CLOSE(volume->devvp, FREAD);
333                 } else {
334                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
335                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
336                 }
337         }
338
339         /*
340          * Destroy the structure
341          */
342         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
343         hammer_free_volume(volume);
344         return(0);
345 }
346
347 static
348 void
349 hammer_free_volume(hammer_volume_t volume)
350 {
351         if (volume->vol_name) {
352                 kfree(volume->vol_name, M_HAMMER);
353                 volume->vol_name = NULL;
354         }
355         if (volume->devvp) {
356                 vrele(volume->devvp);
357                 volume->devvp = NULL;
358         }
359         kfree(volume, M_HAMMER);
360 }
361
362 /*
363  * Get a HAMMER volume.  The volume must already exist.
364  */
365 hammer_volume_t
366 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
367 {
368         struct hammer_volume *volume;
369
370         /*
371          * Locate the volume structure
372          */
373         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
374         if (volume == NULL) {
375                 *errorp = ENOENT;
376                 return(NULL);
377         }
378         hammer_ref(&volume->io.lock);
379
380         /*
381          * Deal with on-disk info
382          */
383         if (volume->ondisk == NULL) {
384                 *errorp = hammer_load_volume(volume);
385                 if (*errorp) {
386                         hammer_rel_volume(volume, 1);
387                         volume = NULL;
388                 }
389         } else {
390                 *errorp = 0;
391         }
392         return(volume);
393 }
394
395 hammer_volume_t
396 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
397 {
398         hammer_volume_t volume;
399
400         volume = hmp->rootvol;
401         KKASSERT(volume != NULL);
402         hammer_ref(&volume->io.lock);
403
404         /*
405          * Deal with on-disk info
406          */
407         if (volume->ondisk == NULL) {
408                 *errorp = hammer_load_volume(volume);
409                 if (*errorp) {
410                         hammer_rel_volume(volume, 1);
411                         volume = NULL;
412                 }
413         } else {
414                 *errorp = 0;
415         }
416         return (volume);
417 }
418
419 /*
420  * Load a volume's on-disk information.  The volume must be referenced and
421  * not locked.  We temporarily acquire an exclusive lock to interlock
422  * against releases or multiple get's.
423  */
424 static int
425 hammer_load_volume(hammer_volume_t volume)
426 {
427         struct hammer_volume_ondisk *ondisk;
428         int error;
429
430         hammer_lock_ex(&volume->io.lock);
431         if (volume->ondisk == NULL) {
432                 error = hammer_io_read(volume->devvp, &volume->io);
433                 if (error) {
434                         hammer_unlock(&volume->io.lock);
435                         return (error);
436                 }
437                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
438
439                 /*
440                  * Configure the volume's A-lists.  These are used to
441                  * allocate clusters.
442                  */
443                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
444                         volume->alist.config = &Vol_super_alist_config;
445                         volume->alist.meta = ondisk->vol_almeta.super;
446                         volume->alist.info = volume;
447                 } else {
448                         volume->alist.config = &Vol_normal_alist_config;
449                         volume->alist.meta = ondisk->vol_almeta.normal;
450                         volume->alist.info = NULL;
451                 }
452                 hammer_alist_init(&volume->alist);
453         } else {
454                 error = 0;
455         }
456         hammer_unlock(&volume->io.lock);
457         return(0);
458 }
459
460 /*
461  * Release a volume.  Call hammer_io_release on the last reference.  We have
462  * to acquire an exclusive lock to interlock against volume->ondisk tests
463  * in hammer_load_volume().
464  */
465 void
466 hammer_rel_volume(hammer_volume_t volume, int flush)
467 {
468         if (hammer_islastref(&volume->io.lock)) {
469                 hammer_lock_ex(&volume->io.lock);
470                 if (hammer_islastref(&volume->io.lock)) {
471                         volume->ondisk = NULL;
472                         hammer_io_release(&volume->io, flush);
473                 }
474                 hammer_unlock(&volume->io.lock);
475         }
476         hammer_unref(&volume->io.lock);
477 }
478
479 /************************************************************************
480  *                              SUPER-CLUSTERS                          *
481  ************************************************************************
482  *
483  * Manage super-clusters.  Note that a supercl holds a reference to its
484  * associated volume.
485  */
486 hammer_supercl_t
487 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
488                    int *errorp, int isnew)
489 {
490         hammer_supercl_t supercl;
491
492         /*
493          * Locate and lock the super-cluster structure, creating one
494          * if necessary.
495          */
496 again:
497         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
498         if (supercl == NULL) {
499                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
500                 supercl->scl_no = scl_no;
501                 supercl->volume = volume;
502                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
503                 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
504                 hammer_ref(&supercl->io.lock);
505
506                 /*
507                  * Insert the cluster into the RB tree and handle late
508                  * collisions.
509                  */
510                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
511                         hammer_unref(&supercl->io.lock);
512                         kfree(supercl, M_HAMMER);
513                         goto again;
514                 }
515                 hammer_ref(&volume->io.lock);
516         } else {
517                 hammer_ref(&supercl->io.lock);
518         }
519
520         /*
521          * Deal with on-disk info
522          */
523         if (supercl->ondisk == NULL || isnew) {
524                 *errorp = hammer_load_supercl(supercl, isnew);
525                 if (*errorp) {
526                         hammer_rel_supercl(supercl, 1);
527                         supercl = NULL;
528                 }
529         } else {
530                 *errorp = 0;
531         }
532         return(supercl);
533 }
534
535 static int
536 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
537 {
538         struct hammer_supercl_ondisk *ondisk;
539         hammer_volume_t volume = supercl->volume;
540         int error;
541
542         hammer_lock_ex(&supercl->io.lock);
543         if (supercl->ondisk == NULL) {
544                 if (isnew)
545                         error = hammer_io_new(volume->devvp, &supercl->io);
546                 else
547                         error = hammer_io_read(volume->devvp, &supercl->io);
548                 if (error) {
549                         hammer_unlock(&supercl->io.lock);
550                         return (error);
551                 }
552                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
553
554                 supercl->alist.config = &Supercl_alist_config;
555                 supercl->alist.meta = ondisk->scl_meta;
556                 supercl->alist.info = NULL;
557
558                 /*
559                  * If this is a new super-cluster we have to initialize
560                  * various ondisk structural elements.  The caller is
561                  * responsible for the remainder.
562                  */
563                 if (isnew) {
564                         struct hammer_alist_live dummy;
565
566                         dummy.config = &Buf_alist_config;
567                         dummy.meta = ondisk->head.buf_almeta;
568                         dummy.info = NULL;
569                         initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
570                         hammer_alist_init(&supercl->alist);
571                 }
572         } else if (isnew) {
573                 error = hammer_io_new(volume->devvp, &supercl->io);
574         } else {
575                 error = 0;
576         }
577         hammer_unlock(&supercl->io.lock);
578         return (error);
579 }
580
581 /*
582  * Release a super-cluster.  We have to deal with several places where
583  * another thread can ref the super-cluster.
584  *
585  * Only destroy the structure itself if the related buffer cache buffer
586  * was disassociated from it.  This ties the management of the structure
587  * to the buffer cache subsystem.
588  */
589 void
590 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
591 {
592         hammer_volume_t volume;
593
594         if (hammer_islastref(&supercl->io.lock)) {
595                 hammer_lock_ex(&supercl->io.lock);
596                 if (hammer_islastref(&supercl->io.lock)) {
597                         supercl->ondisk = NULL;
598                         hammer_io_release(&supercl->io, flush);
599                         if (supercl->io.bp == NULL &&
600                             hammer_islastref(&supercl->io.lock)) {
601                                 volume = supercl->volume;
602                                 RB_REMOVE(hammer_scl_rb_tree,
603                                           &volume->rb_scls_root, supercl);
604                                 supercl->volume = NULL; /* sanity */
605                                 kfree(supercl, M_HAMMER);
606                                 hammer_rel_volume(volume, 0);
607                                 return;
608                         }
609                 }
610                 hammer_unlock(&supercl->io.lock);
611         }
612         hammer_unref(&supercl->io.lock);
613 }
614
615 /************************************************************************
616  *                              CLUSTERS                                *
617  ************************************************************************
618  *
619  * Manage clusters.  Note that a cluster holds a reference to its
620  * associated volume.
621  */
622 hammer_cluster_t
623 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
624                    int *errorp, int isnew)
625 {
626         hammer_cluster_t cluster;
627
628 again:
629         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
630         if (cluster == NULL) {
631                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
632                 cluster->clu_no = clu_no;
633                 cluster->volume = volume;
634                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
635                 RB_INIT(&cluster->rb_bufs_root);
636                 RB_INIT(&cluster->rb_nods_root);
637                 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
638                 hammer_ref(&cluster->io.lock);
639
640                 /*
641                  * Insert the cluster into the RB tree and handle late
642                  * collisions.
643                  */
644                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
645                         hammer_unref(&cluster->io.lock);
646                         kfree(cluster, M_HAMMER);
647                         goto again;
648                 }
649                 hammer_ref(&volume->io.lock);
650         } else {
651                 hammer_ref(&cluster->io.lock);
652         }
653
654         /*
655          * Deal with on-disk info
656          */
657         if (cluster->ondisk == NULL || isnew) {
658                 *errorp = hammer_load_cluster(cluster, isnew);
659                 if (*errorp) {
660                         hammer_rel_cluster(cluster, 1);
661                         cluster = NULL;
662                 }
663         } else {
664                 *errorp = 0;
665         }
666         return (cluster);
667 }
668
669 hammer_cluster_t
670 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
671 {
672         hammer_cluster_t cluster;
673
674         cluster = hmp->rootcl;
675         KKASSERT(cluster != NULL);
676         hammer_ref(&cluster->io.lock);
677
678         /*
679          * Deal with on-disk info
680          */
681         if (cluster->ondisk == NULL) {
682                 *errorp = hammer_load_cluster(cluster, 0);
683                 if (*errorp) {
684                         hammer_rel_cluster(cluster, 1);
685                         cluster = NULL;
686                 }
687         } else {
688                 *errorp = 0;
689         }
690         return (cluster);
691 }
692
693 static
694 int
695 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
696 {
697         hammer_volume_t volume = cluster->volume;
698         struct hammer_cluster_ondisk *ondisk;
699         int error;
700
701         /*
702          * Load the cluster's on-disk info
703          */
704         hammer_lock_ex(&cluster->io.lock);
705         if (cluster->ondisk == NULL) {
706                 if (isnew)
707                         error = hammer_io_new(volume->devvp, &cluster->io);
708                 else
709                         error = hammer_io_read(volume->devvp, &cluster->io);
710                 if (error) {
711                         hammer_unlock(&cluster->io.lock);
712                         return (error);
713                 }
714                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
715
716                 cluster->alist_master.config = &Clu_master_alist_config;
717                 cluster->alist_master.meta = ondisk->clu_master_meta;
718                 cluster->alist_btree.config = &Clu_slave_alist_config;
719                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
720                 cluster->alist_btree.info = cluster;
721                 cluster->alist_record.config = &Clu_slave_alist_config;
722                 cluster->alist_record.meta = ondisk->clu_record_meta;
723                 cluster->alist_record.info = cluster;
724                 cluster->alist_mdata.config = &Clu_slave_alist_config;
725                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
726                 cluster->alist_mdata.info = cluster;
727
728                 cluster->clu_btree_beg = ondisk->clu_btree_beg;
729                 cluster->clu_btree_end = ondisk->clu_btree_end;
730
731                 /*
732                  * If this is a new cluster we have to initialize
733                  * various ondisk structural elements.  The caller is
734                  * responsible for the remainder.
735                  */
736                 if (isnew) {
737                         struct hammer_alist_live dummy;
738
739                         dummy.config = &Buf_alist_config;
740                         dummy.meta = ondisk->head.buf_almeta;
741                         dummy.info = NULL;
742                         initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
743
744                         hammer_alist_init(&cluster->alist_master);
745                         hammer_alist_init(&cluster->alist_btree);
746                         hammer_alist_init(&cluster->alist_record);
747                         hammer_alist_init(&cluster->alist_mdata);
748                 }
749         } else if (isnew) {
750                 error = hammer_io_new(volume->devvp, &cluster->io);
751         } else {
752                 error = 0;
753         }
754         hammer_unlock(&cluster->io.lock);
755         return (error);
756 }
757
758 /*
759  * Reference a cluster that is either already referenced or via a specially
760  * handled pointer (aka rootcl).
761  */
762 int
763 hammer_ref_cluster(hammer_cluster_t cluster)
764 {
765         int error;
766
767         KKASSERT(cluster != NULL);
768         hammer_ref(&cluster->io.lock);
769
770         /*
771          * Deal with on-disk info
772          */
773         if (cluster->ondisk == NULL) {
774                 error = hammer_load_cluster(cluster, 0);
775                 if (error)
776                         hammer_rel_cluster(cluster, 1);
777         } else {
778                 error = 0;
779         }
780         return(error);
781 }
782
783 /*
784  * Release a cluster.  We have to deal with several places where
785  * another thread can ref the cluster.
786  *
787  * Only destroy the structure itself if the related buffer cache buffer
788  * was disassociated from it.  This ties the management of the structure
789  * to the buffer cache subsystem.
790  */
791 void
792 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
793 {
794         hammer_node_t node;
795         hammer_volume_t volume;
796
797         if (hammer_islastref(&cluster->io.lock)) {
798                 hammer_lock_ex(&cluster->io.lock);
799                 if (hammer_islastref(&cluster->io.lock)) {
800                         cluster->ondisk = NULL;
801                         hammer_io_release(&cluster->io, flush);
802
803                         /*
804                          * Clean out the B-Tree node cache, if any, then
805                          * clean up the volume ref and free the cluster.
806                          *
807                          * If the cluster acquires a new reference while we
808                          * are trying to clean it out, abort the cleaning.
809                          *
810                          * There really shouldn't be any nodes at this point
811                          * but we allow a node with no buffer association
812                          * so handle the case.
813                          */
814                         while (cluster->io.bp == NULL &&
815                                hammer_islastref(&cluster->io.lock) &&
816                                (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
817                         ) {
818                                 KKASSERT(node->lock.refs == 0);
819                                 hammer_flush_node(node);
820                         }
821                         if (cluster->io.bp == NULL &&
822                             hammer_islastref(&cluster->io.lock)) {
823                                 volume = cluster->volume;
824                                 RB_REMOVE(hammer_clu_rb_tree,
825                                           &volume->rb_clus_root, cluster);
826                                 cluster->volume = NULL; /* sanity */
827                                 kfree(cluster, M_HAMMER);
828                                 hammer_rel_volume(volume, 0);
829                                 return;
830                         }
831                 }
832                 hammer_unlock(&cluster->io.lock);
833         }
834         hammer_unref(&cluster->io.lock);
835 }
836
837 /************************************************************************
838  *                              BUFFERS                                 *
839  ************************************************************************
840  *
841  * Manage buffers.  Note that a buffer holds a reference to its associated
842  * cluster, and its cluster will hold a reference to the cluster's volume.
843  *
844  * A non-zero buf_type indicates that a new buffer should be created and
845  * zero'd.
846  */
847 hammer_buffer_t
848 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
849                   u_int64_t buf_type, int *errorp)
850 {
851         hammer_buffer_t buffer;
852
853         /*
854          * Find the buffer.  Note that buffer 0 corresponds to the cluster
855          * header and should never be requested.
856          */
857         KKASSERT(buf_no != 0);
858
859         /*
860          * Locate and lock the buffer structure, creating one if necessary.
861          */
862 again:
863         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
864         if (buffer == NULL) {
865                 buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
866                 buffer->buf_no = buf_no;
867                 buffer->cluster = cluster;
868                 buffer->volume = cluster->volume;
869                 buffer->io.offset = cluster->io.offset +
870                                     (buf_no * HAMMER_BUFSIZE);
871                 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
872                 TAILQ_INIT(&buffer->clist);
873                 hammer_ref(&buffer->io.lock);
874
875                 /*
876                  * Insert the cluster into the RB tree and handle late
877                  * collisions.
878                  */
879                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
880                         hammer_unref(&buffer->io.lock);
881                         kfree(buffer, M_HAMMER);
882                         goto again;
883                 }
884                 hammer_ref(&cluster->io.lock);
885         } else {
886                 hammer_ref(&buffer->io.lock);
887         }
888
889         /*
890          * Deal with on-disk info
891          */
892         if (buffer->ondisk == NULL || buf_type) {
893                 *errorp = hammer_load_buffer(buffer, buf_type);
894                 if (*errorp) {
895                         hammer_rel_buffer(buffer, 1);
896                         buffer = NULL;
897                 }
898         } else {
899                 *errorp = 0;
900         }
901         return(buffer);
902 }
903
904 static int
905 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
906 {
907         hammer_volume_t volume;
908         hammer_fsbuf_ondisk_t ondisk;
909         int error;
910
911         /*
912          * Load the buffer's on-disk info
913          */
914         volume = buffer->volume;
915         hammer_lock_ex(&buffer->io.lock);
916         if (buffer->ondisk == NULL) {
917                 if (buf_type) {
918                         error = hammer_io_new(volume->devvp, &buffer->io);
919                 } else {
920                         error = hammer_io_read(volume->devvp, &buffer->io);
921                 }
922                 if (error) {
923                         hammer_unlock(&buffer->io.lock);
924                         return (error);
925                 }
926                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
927                 buffer->alist.config = &Buf_alist_config;
928                 buffer->alist.meta = ondisk->head.buf_almeta;
929
930                 if (buf_type) {
931                         initbuffer(&buffer->alist, &ondisk->head, buf_type);
932                 }
933                 buffer->buf_type = ondisk->head.buf_type;
934         } else if (buf_type) {
935                 error = hammer_io_new(volume->devvp, &buffer->io);
936         } else {
937                 error = 0;
938         }
939         hammer_unlock(&buffer->io.lock);
940         return (error);
941 }
942
943 /*
944  * Reference a buffer that is either already referenced or via a specially
945  * handled pointer (aka cursor->buffer).
946  */
947 int
948 hammer_ref_buffer(hammer_buffer_t buffer)
949 {
950         int error;
951
952         hammer_ref(&buffer->io.lock);
953         if (buffer->ondisk == NULL) {
954                 error = hammer_load_buffer(buffer, 0);
955                 if (error) {
956                         hammer_rel_buffer(buffer, 1);
957                         /*
958                          * NOTE: buffer pointer can become stale after
959                          * the above release.
960                          */
961                 } else {
962                         KKASSERT(buffer->buf_type ==
963                                  buffer->ondisk->head.buf_type);
964                 }
965         } else {
966                 error = 0;
967         }
968         return(error);
969 }
970
971 /*
972  * Release a buffer.  We have to deal with several places where
973  * another thread can ref the buffer.
974  *
975  * Only destroy the structure itself if the related buffer cache buffer
976  * was disassociated from it.  This ties the management of the structure
977  * to the buffer cache subsystem.  buffer->ondisk determines whether the
978  * embedded io is referenced or not.
979  */
980 void
981 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
982 {
983         hammer_cluster_t cluster;
984         hammer_node_t node;
985
986         if (hammer_islastref(&buffer->io.lock)) {
987                 hammer_lock_ex(&buffer->io.lock);
988                 if (hammer_islastref(&buffer->io.lock)) {
989                         buffer->ondisk = NULL;
990                         hammer_io_release(&buffer->io, flush);
991
992                         /*
993                          * Clean out the B-Tree node cache, if any, then
994                          * clean up the cluster ref and free the buffer.
995                          *
996                          * If the buffer acquires a new reference while we
997                          * are trying to clean it out, abort the cleaning.
998                          */
999                         while (buffer->io.bp == NULL &&
1000                                hammer_islastref(&buffer->io.lock) &&
1001                                (node = TAILQ_FIRST(&buffer->clist)) != NULL
1002                         ) {
1003                                 KKASSERT(node->lock.refs == 0);
1004                                 hammer_flush_node(node);
1005                         }
1006                         if (buffer->io.bp == NULL &&
1007                             hammer_islastref(&buffer->io.lock)) {
1008                                 cluster = buffer->cluster;
1009                                 RB_REMOVE(hammer_buf_rb_tree,
1010                                           &cluster->rb_bufs_root, buffer);
1011                                 buffer->cluster = NULL; /* sanity */
1012                                 kfree(buffer, M_HAMMER);
1013                                 hammer_rel_cluster(cluster, 0);
1014                                 return;
1015                         }
1016                 }
1017                 hammer_unlock(&buffer->io.lock);
1018         }
1019         hammer_unref(&buffer->io.lock);
1020 }
1021
1022 /*
1023  * Flush passively cached B-Tree nodes associated with this buffer.
1024  *
1025  * NOTE: The buffer is referenced and locked.
1026  */
1027 void
1028 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1029 {
1030         hammer_node_t node;
1031
1032         node = TAILQ_FIRST(&buffer->clist);
1033         while (node) {
1034                 buffer->save_scan = TAILQ_NEXT(node, entry);
1035                 if (node->lock.refs == 0)
1036                         hammer_flush_node(node);
1037                 node = buffer->save_scan;
1038         }
1039 }
1040
1041 /************************************************************************
1042  *                              NODES                                   *
1043  ************************************************************************
1044  *
1045  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1046  * method used by the HAMMER filesystem.
1047  *
1048  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1049  * associated with its buffer.  It can have an active buffer reference
1050  * even when the node itself has no references.  The node also passively
1051  * associates itself with its cluster without holding any cluster refs.
1052  * The cluster ref is indirectly maintained by the active buffer ref when
1053  * a node is acquired.
1054  *
1055  * A hammer_node can also be passively associated with other HAMMER
1056  * structures, such as inodes, while retaining 0 references.  These
1057  * associations can be cleared backwards using a pointer-to-pointer in
1058  * the hammer_node.
1059  *
1060  * This allows the HAMMER implementation to cache hammer_node's long-term
1061  * and short-cut a great deal of the infrastructure's complexity.  In
1062  * most cases a cached node can be reacquired without having to dip into
1063  * either the buffer or cluster management code.
1064  *
1065  * The caller must pass a referenced cluster on call and will retain
1066  * ownership of the reference on return.  The node will acquire its own
1067  * additional references, if necessary.
1068  */
1069 hammer_node_t
1070 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1071 {
1072         hammer_node_t node;
1073
1074         /*
1075          * Locate the structure, allocating one if necessary.
1076          */
1077 again:
1078         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1079                          node_offset);
1080         if (node == NULL) {
1081                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1082                 node->node_offset = node_offset;
1083                 node->cluster = cluster;
1084                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1085                               node)) {
1086                         kfree(node, M_HAMMER);
1087                         goto again;
1088                 }
1089         }
1090         *errorp = hammer_ref_node(node);
1091         if (*errorp) {
1092                 /*
1093                  * NOTE: The node pointer may be stale on error return.
1094                  * In fact, its probably been destroyed.
1095                  */
1096                 node = NULL;
1097         }
1098         return(node);
1099 }
1100
1101 /*
1102  * Reference the node to prevent disassociations, then associate and
1103  * load the related buffer.  This routine can also be called to reference
1104  * a node from a cache pointer.
1105  *
1106  * NOTE: Because the caller does not have a ref on the node, the caller's
1107  * node pointer will be stale if an error is returned.  We may also wind
1108  * up clearing the related cache pointers.
1109  *
1110  * NOTE: The cluster is indirectly referenced by our buffer ref.
1111  */
1112 int
1113 hammer_ref_node(hammer_node_t node)
1114 {
1115         hammer_buffer_t buffer;
1116         int32_t buf_no;
1117         int error;
1118
1119         hammer_ref(&node->lock);
1120         if (node->ondisk == NULL) {
1121                 hammer_lock_ex(&node->lock);
1122                 if (node->ondisk == NULL) {
1123                         /*
1124                          * This is a little confusing but the jist is that
1125                          * node->buffer determines whether the node is on
1126                          * the buffer's clist and node->ondisk determines
1127                          * whether the buffer is referenced.
1128                          */
1129                         if ((buffer = node->buffer) != NULL) {
1130                                 error = hammer_ref_buffer(buffer);
1131                         } else {
1132                                 buf_no = node->node_offset / HAMMER_BUFSIZE;
1133                                 buffer = hammer_get_buffer(node->cluster,
1134                                                            buf_no, 0, &error);
1135                                 if (buffer) {
1136                                         KKASSERT(error == 0);
1137                                         TAILQ_INSERT_TAIL(&buffer->clist,
1138                                                           node, entry);
1139                                         node->buffer = buffer;
1140                                 }
1141                         }
1142                         if (error == 0) {
1143                                 node->ondisk = (void *)((char *)buffer->ondisk +
1144                                        (node->node_offset & HAMMER_BUFMASK));
1145                         }
1146                 }
1147                 hammer_unlock(&node->lock);
1148         }
1149         if (error)
1150                 hammer_rel_node(node);
1151         return (error);
1152 }
1153
1154 /*
1155  * Release a hammer_node.  The node retains a passive association with
1156  * its cluster, buffer and caches.
1157  *
1158  * However, to avoid cluttering up kernel memory with tons of B-Tree
1159  * node cache structures we destroy the node if no passive cache or
1160  * (instantiated) buffer references exist.
1161  */
1162 void
1163 hammer_rel_node(hammer_node_t node)
1164 {
1165         hammer_cluster_t cluster;
1166         hammer_buffer_t buffer;
1167
1168         if (hammer_islastref(&node->lock)) {
1169                 cluster = node->cluster;
1170                 /*
1171                  * Clutter control, this case only occurs after a failed
1172                  * load since otherwise ondisk will be non-NULL.
1173                  */
1174                 if (node->cache1 == NULL && node->cache2 == NULL && 
1175                     node->ondisk == NULL) {
1176                         RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1177                                   node);
1178                         if ((buffer = node->buffer) != NULL) {
1179                                 node->buffer = NULL;
1180                                 hammer_remove_node_clist(buffer, node);
1181                         }
1182                         kfree(node, M_HAMMER);
1183                         return;
1184                 }
1185
1186                 /*
1187                  * node->ondisk determines whether we have a buffer reference
1188                  * to get rid of or not.  Only get rid of the reference if
1189                  * the kernel tried to flush the buffer.
1190                  *
1191                  * NOTE: Once unref'd the node can be physically destroyed,
1192                  * so our node is stale afterwords.
1193                  *
1194                  * This case occurs if the node still has cache references.
1195                  * We could remove the references and free the structure
1196                  * but for now we allow them (and the node structure) to
1197                  * remain intact.
1198                  */
1199                 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1200                         buffer = node->buffer;
1201                         node->buffer = NULL;
1202                         node->ondisk = NULL;
1203                         hammer_remove_node_clist(buffer, node);
1204                         hammer_unref(&node->lock);
1205                         hammer_rel_buffer(buffer, 0);
1206                 } else {
1207                         hammer_unref(&node->lock);
1208                 }
1209         } else {
1210                 hammer_unref(&node->lock);
1211         }
1212 }
1213
1214 /*
1215  * Cache-and-release a hammer_node.  Kinda like catching and releasing a
1216  * fish, but keeping an eye on him.  The node is passively cached in *cache.
1217  *
1218  * NOTE!  HAMMER may NULL *cache at any time, even after you have
1219  * referenced the node!
1220  */
1221 void
1222 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1223 {
1224         if (node->cache1 != cache) {
1225                 if (node->cache2 == cache) {
1226                         struct hammer_node **tmp;
1227                         tmp = node->cache1;
1228                         node->cache1 = node->cache2;
1229                         node->cache2 = tmp;
1230                 } else {
1231                         if (node->cache2)
1232                                 *node->cache2 = NULL;
1233                         node->cache2 = node->cache1;
1234                         node->cache1 = cache;
1235                         *cache = node;
1236                 }
1237         }
1238         hammer_rel_node(node);
1239 }
1240
1241 void
1242 hammer_uncache_node(struct hammer_node **cache)
1243 {
1244         hammer_node_t node;
1245
1246         if ((node = *cache) != NULL) {
1247                 *cache = NULL;
1248                 if (node->cache1 == cache) {
1249                         node->cache1 = node->cache2;
1250                         node->cache2 = NULL;
1251                 } else if (node->cache2 == cache) {
1252                         node->cache2 = NULL;
1253                 } else {
1254                         panic("hammer_uncache_node: missing cache linkage");
1255                 }
1256                 if (node->cache1 == NULL && node->cache2 == NULL &&
1257                     node->lock.refs == 0) {
1258                         hammer_flush_node(node);
1259                 }
1260         }
1261 }
1262
1263 /*
1264  * Remove a node's cache references and destroy the node if it has no
1265  * references.  This is typically called from the buffer handling code.
1266  *
1267  * The node may have an active buffer reference (ondisk != NULL) even
1268  * if the node itself has no references.
1269  *
1270  * Note that a caller iterating through nodes via a buffer must have its
1271  * own reference on the buffer or our hammer_rel_buffer() call below may
1272  * rip it out from under the caller.
1273  */
1274 void
1275 hammer_flush_node(hammer_node_t node)
1276 {
1277         hammer_buffer_t buffer;
1278
1279         if (node->cache1)
1280                 *node->cache1 = NULL;
1281         if (node->cache2)
1282                 *node->cache2 = NULL;
1283         if (node->lock.refs == 0) {
1284                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1285                           node);
1286                 if ((buffer = node->buffer) != NULL) {
1287                         node->buffer = NULL;
1288                         hammer_remove_node_clist(buffer, node);
1289                         if (node->ondisk) {
1290                                 node->ondisk = NULL;
1291                                 hammer_rel_buffer(buffer, 0);
1292                         }
1293                 }
1294                 kfree(node, M_HAMMER);
1295         }
1296 }
1297
1298 /*
1299  * Remove a node from the buffer's clist.  Adjust save_scan as appropriate.
1300  * This is in its own little routine to properly handle interactions with
1301  * save_scan, so it is possible to block while scanning a buffer's node list.
1302  */
1303 static
1304 void
1305 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1306 {
1307         if (buffer->save_scan == node)
1308                 buffer->save_scan = TAILQ_NEXT(node, entry);
1309         TAILQ_REMOVE(&buffer->clist, node, entry);
1310 }
1311
1312 /************************************************************************
1313  *                              A-LIST ALLOCATORS                       *
1314  ************************************************************************/
1315
1316 /*
1317  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1318  *
1319  * The passed *bufferp should be initialized to NULL.  On successive calls
1320  * *bufferp caches the most recent buffer used until put away by the caller.
1321  * Note that previously returned pointers using the cached buffer become
1322  * invalid on successive calls which reuse *bufferp.
1323  *
1324  * All allocations first attempt to use the block found at the specified
1325  * iterator.  If that fails the first available block is used.  If that
1326  * fails a new buffer is allocated and associated with the buffer type
1327  * A-list and the element is allocated out of the new buffer.
1328  */
1329
1330 hammer_node_t
1331 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1332 {
1333         hammer_buffer_t buffer;
1334         hammer_alist_t live;
1335         hammer_node_t node;
1336         int32_t elm_no;
1337         int32_t buf_no;
1338         int32_t node_offset;
1339
1340         /*
1341          * Allocate a B-Tree element
1342          */
1343         buffer = NULL;
1344         live = &cluster->alist_btree;
1345         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1346         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1347                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1348         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1349                 alloc_new_buffer(cluster, live,
1350                                  HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1351                                  cluster->ondisk->idx_index, errorp, &buffer);
1352                 elm_no = hammer_alist_alloc(live, 1);
1353                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1354                         *errorp = ENOSPC;
1355                         if (buffer)
1356                                 hammer_rel_buffer(buffer, 0);
1357                         return(NULL);
1358                 }
1359         }
1360         cluster->ondisk->idx_index = elm_no;
1361
1362         /*
1363          * Load and return the B-Tree element
1364          */
1365         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1366         node_offset = buf_no * HAMMER_BUFSIZE +
1367                       offsetof(union hammer_fsbuf_ondisk,
1368                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1369         node = hammer_get_node(cluster, node_offset, errorp);
1370         if (node) {
1371                 bzero(node->ondisk, sizeof(*node->ondisk));
1372         } else {
1373                 hammer_alist_free(live, elm_no, 1);
1374                 hammer_rel_node(node);
1375                 node = NULL;
1376         }
1377         if (buffer)
1378                 hammer_rel_buffer(buffer, 0);
1379         return(node);
1380 }
1381
1382 void *
1383 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1384                   int *errorp, struct hammer_buffer **bufferp)
1385 {
1386         hammer_buffer_t buffer;
1387         hammer_alist_t live;
1388         int32_t elm_no;
1389         int32_t buf_no;
1390         int32_t nblks;
1391         void *item;
1392
1393         /*
1394          * Allocate a data element
1395          */
1396         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1397         live = &cluster->alist_mdata;
1398         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1399         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1400                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1401         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1402                 alloc_new_buffer(cluster, live,
1403                                  HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1404                                  cluster->ondisk->idx_data, errorp, bufferp);
1405                 elm_no = hammer_alist_alloc(live, nblks);
1406                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1407                         *errorp = ENOSPC;
1408                         return(NULL);
1409                 }
1410         }
1411         cluster->ondisk->idx_index = elm_no;
1412
1413         /*
1414          * Load and return the B-Tree element
1415          */
1416         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1417         buffer = *bufferp;
1418         if (buffer == NULL || buffer->cluster != cluster ||
1419             buffer->buf_no != buf_no) {
1420                 if (buffer)
1421                         hammer_rel_buffer(buffer, 0);
1422                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1423                 *bufferp = buffer;
1424         }
1425         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE);
1426         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1427         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1428         *errorp = 0;
1429         return(item);
1430 }
1431
1432 void *
1433 hammer_alloc_record(hammer_cluster_t cluster,
1434                     int *errorp, struct hammer_buffer **bufferp)
1435 {
1436         hammer_buffer_t buffer;
1437         hammer_alist_t live;
1438         int32_t elm_no;
1439         int32_t buf_no;
1440         void *item;
1441
1442         /*
1443          * Allocate a record element
1444          */
1445         live = &cluster->alist_record;
1446         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1447         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1448                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1449         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1450                 alloc_new_buffer(cluster, live,
1451                                  HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1452                                  cluster->ondisk->idx_record, errorp, bufferp);
1453                 elm_no = hammer_alist_alloc(live, 1);
1454                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1455                         *errorp = ENOSPC;
1456                         return(NULL);
1457                 }
1458         }
1459         cluster->ondisk->idx_record = elm_no;
1460
1461         /*
1462          * Load and return the B-Tree element
1463          */
1464         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1465         buffer = *bufferp;
1466         if (buffer == NULL || buffer->cluster != cluster ||
1467             buffer->buf_no != buf_no) {
1468                 if (buffer)
1469                         hammer_rel_buffer(buffer, 0);
1470                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1471                 *bufferp = buffer;
1472         }
1473         KKASSERT(buffer->ondisk->head.buf_type != 0);
1474         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1475         bzero(item, sizeof(union hammer_record_ondisk));
1476         *errorp = 0;
1477         return(item);
1478 }
1479
1480 /*
1481  * Free HAMMER elements based on either a hammer_buffer and element pointer
1482  * or a cluster-relative byte offset.
1483  */
1484 void
1485 hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node)
1486 {
1487         int32_t elm_no;
1488         hammer_alist_t live;
1489
1490         elm_no = node - &buffer->ondisk->btree.nodes[0];
1491         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1492         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1493         live = &buffer->cluster->alist_btree;
1494         hammer_alist_free(live, elm_no, 1);
1495 }
1496
1497 void
1498 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1499 {
1500         int32_t elm_no;
1501         int32_t nblks;
1502         hammer_alist_t live;
1503
1504         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1505                  HAMMER_DATA_BLKSIZE;
1506         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1507         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1508         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1509         live = &buffer->cluster->alist_mdata;
1510         hammer_alist_free(live, elm_no, nblks);
1511 }
1512
1513 void
1514 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1515 {
1516         int32_t elm_no;
1517         hammer_alist_t live;
1518
1519         elm_no = rec - &buffer->ondisk->record.recs[0];
1520         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1521         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1522         live = &buffer->cluster->alist_record;
1523         hammer_alist_free(live, elm_no, 1);
1524 }
1525
1526 void
1527 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1528 {
1529         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1530         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1531         hammer_alist_t live;
1532         int32_t elm_no;
1533
1534         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1535         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1536         live = &cluster->alist_btree;
1537         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1538         elm_no += fsbuf_offset / blksize;
1539         hammer_alist_free(live, elm_no, 1);
1540 }
1541
1542 void
1543 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1544 {
1545         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1546         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1547         hammer_alist_t live;
1548         int32_t elm_no;
1549         int32_t nblks;
1550
1551         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1552         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1553         live = &cluster->alist_mdata;
1554         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1555         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1556         elm_no += fsbuf_offset / blksize;
1557         hammer_alist_free(live, elm_no, nblks);
1558 }
1559
1560 void
1561 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1562 {
1563         const int32_t blksize = sizeof(union hammer_record_ondisk);
1564         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1565         hammer_alist_t live;
1566         int32_t elm_no;
1567
1568         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1569         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1570         live = &cluster->alist_record;
1571         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1572         elm_no += fsbuf_offset / blksize;
1573         hammer_alist_free(live, elm_no, 1);
1574 }
1575
1576
1577 /*
1578  * Allocate a new filesystem buffer and assign it to the specified
1579  * filesystem buffer type.  The new buffer will be added to the
1580  * type-specific A-list and initialized.
1581  */
1582 static void
1583 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1584                  u_int64_t type, int32_t nelements,
1585                  int start, int *errorp, struct hammer_buffer **bufferp)
1586 {
1587         hammer_buffer_t buffer;
1588         int32_t buf_no;
1589
1590         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1591
1592         if (type == HAMMER_FSBUF_RECORDS) {
1593                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1594                                                 1, start);
1595                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1596                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1597                                                 1, HAMMER_ALIST_BLOCK_MAX);
1598                 }
1599         } else {
1600                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1601                                                 1, start);
1602                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1603                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1604                                                 1, 0);
1605                 }
1606         }
1607         KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1608
1609         /*
1610          * The new buffer must be initialized (type != 0) regardless of
1611          * whether we already have it cached or not, so don't try to
1612          * optimize the cached buffer check.  Just call hammer_get_buffer().
1613          */
1614         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1615         if (*bufferp)
1616                 hammer_rel_buffer(*bufferp, 0);
1617         *bufferp = buffer;
1618 }
1619
1620 #if 0
1621
1622 /*
1623  * Flush various tracking structures to disk
1624  */
1625
1626 /*
1627  * Flush various tracking structures to disk
1628  */
1629 void
1630 flush_all_volumes(void)
1631 {
1632         hammer_volume_t vol;
1633
1634         for (vol = VolBase; vol; vol = vol->next)
1635                 flush_volume(vol);
1636 }
1637
1638 void
1639 flush_volume(hammer_volume_t vol)
1640 {
1641         hammer_supercl_t supercl;
1642         hammer_cluster_t cl;
1643
1644         for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1645                 flush_supercl(supercl);
1646         for (cl = vol->cluster_base; cl; cl = cl->next)
1647                 flush_cluster(cl);
1648         writehammerbuf(vol, vol->ondisk, 0);
1649 }
1650
1651 void
1652 flush_supercl(hammer_supercl_t supercl)
1653 {
1654         int64_t supercl_offset;
1655
1656         supercl_offset = supercl->scl_offset;
1657         writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1658 }
1659
1660 void
1661 flush_cluster(hammer_cluster_t cl)
1662 {
1663         hammer_buffer_t buf;
1664         int64_t cluster_offset;
1665
1666         for (buf = cl->buffer_base; buf; buf = buf->next)
1667                 flush_buffer(buf);
1668         cluster_offset = cl->clu_offset;
1669         writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1670 }
1671
1672 void
1673 flush_buffer(hammer_buffer_t buf)
1674 {
1675         int64_t buffer_offset;
1676
1677         buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1678         writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1679 }
1680
1681 #endif
1682
1683 /*
1684  * Generic buffer initialization
1685  */
1686 static void
1687 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1688 {
1689         head->buf_type = type;
1690         hammer_alist_init(live);
1691 }
1692
1693 /*
1694  * Calculate the cluster's offset in the volume.  This calculation is
1695  * slightly more complex when using superclusters because superclusters
1696  * are grouped in blocks of 16, followed by 16 x N clusters where N
1697  * is the number of clusters a supercluster can manage.
1698  */
1699 static int64_t
1700 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1701 {
1702         int32_t scl_group;
1703         int64_t scl_group_size;
1704         int64_t off;
1705
1706         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1707                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1708                             HAMMER_SCL_MAXCLUSTERS;
1709                 scl_group_size = 
1710                             ((int64_t)HAMMER_BUFSIZE *
1711                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1712                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1713                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1714                 scl_group_size += 
1715                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1716
1717                 off = volume->cluster_base +
1718                       scl_group * scl_group_size +
1719                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1720                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1721                        HAMMER_VOL_SUPERCLUSTER_GROUP))
1722                       * HAMMER_BUFSIZE;
1723         } else {
1724                 off = volume->cluster_base +
1725                       (int64_t)clu_no * volume->vol_clsize;
1726         }
1727         return(off);
1728 }
1729
1730 /*
1731  * Calculate a super-cluster's offset in the volume.
1732  */
1733 static int64_t
1734 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1735 {
1736         int64_t off;
1737         int32_t scl_group;
1738         int64_t scl_group_size;
1739
1740         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1741         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1742         if (scl_group) {
1743                 scl_group_size = 
1744                             ((int64_t)HAMMER_BUFSIZE *
1745                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1746                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1747                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1748                 scl_group_size += 
1749                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1750                 off = volume->cluster_base + (scl_group * scl_group_size) +
1751                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1752         } else {
1753                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1754         }
1755         return(off);
1756 }
1757
1758 /*
1759  * A-LIST SUPPORT
1760  *
1761  * Setup the parameters for the various A-lists we use in hammer.  The
1762  * supercluster A-list must be chained to the cluster A-list and cluster
1763  * slave A-lists are chained to buffer A-lists.
1764  *
1765  * See hammer_init_alist_config() below.
1766  */
1767
1768 /*
1769  * A-LIST - cluster recursion into a filesystem buffer
1770  */
1771 static int
1772 buffer_alist_init(void *info, int32_t blk, int32_t radix)
1773 {
1774         hammer_cluster_t cluster = info;
1775         hammer_buffer_t buffer;
1776         int32_t buf_no;
1777         int error = 0;
1778
1779         /*
1780          * Calculate the buffer number, initialize based on the buffer type.
1781          * The buffer has already been allocated so assert that it has been
1782          * initialized.
1783          */
1784         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1785         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1786         if (buffer)
1787                 hammer_rel_buffer(buffer, 0);
1788         return (error);
1789 }
1790
1791 static int
1792 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
1793 {
1794         return (0);
1795 }
1796
1797 static int
1798 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1799                       int32_t count, int32_t atblk, int32_t *fullp)
1800 {
1801         hammer_cluster_t cluster = info;
1802         hammer_buffer_t buffer;
1803         int32_t buf_no;
1804         int32_t r;
1805         int error = 0;
1806
1807         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1808         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1809         if (buffer) {
1810                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1811
1812                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
1813                 if (r != HAMMER_ALIST_BLOCK_NONE)
1814                         r += blk;
1815                 *fullp = hammer_alist_isfull(&buffer->alist);
1816                 hammer_rel_buffer(buffer, 0);
1817         } else {
1818                 r = HAMMER_ALIST_BLOCK_NONE;
1819         }
1820         return(r);
1821 }
1822
1823 static int
1824 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1825                       int32_t count, int32_t atblk, int32_t *fullp)
1826 {
1827         hammer_cluster_t cluster = info;
1828         hammer_buffer_t buffer;
1829         int32_t buf_no;
1830         int32_t r;
1831         int error = 0;
1832
1833         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1834         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1835         if (buffer) {
1836                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1837
1838                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
1839                 if (r != HAMMER_ALIST_BLOCK_NONE)
1840                         r += blk;
1841                 *fullp = hammer_alist_isfull(&buffer->alist);
1842                 hammer_rel_buffer(buffer, 0);
1843         } else {
1844                 r = HAMMER_ALIST_BLOCK_NONE;
1845                 *fullp = 0;
1846         }
1847         return(r);
1848 }
1849
1850 static void
1851 buffer_alist_free(void *info, int32_t blk, int32_t radix,
1852                  int32_t base_blk, int32_t count, int32_t *emptyp)
1853 {
1854         hammer_cluster_t cluster = info;
1855         hammer_buffer_t buffer;
1856         int32_t buf_no;
1857         int error = 0;
1858
1859         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1860         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1861         if (buffer) {
1862                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1863                 hammer_alist_free(&buffer->alist, base_blk, count);
1864                 *emptyp = hammer_alist_isempty(&buffer->alist);
1865                 hammer_rel_buffer(buffer, 0);
1866         } else {
1867                 *emptyp = 0;
1868         }
1869 }
1870
1871 static void
1872 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
1873 {
1874 }
1875
1876 /*
1877  * A-LIST - super-cluster recursion into a cluster and cluster recursion
1878  * into a filesystem buffer.  A-List's are mostly self-contained entities,
1879  * but callbacks must be installed to recurse from one A-List to another.
1880  *
1881  * Implementing these callbacks allows us to operate a multi-layered A-List
1882  * as a single entity.
1883  */
1884 static int
1885 super_alist_init(void *info, int32_t blk, int32_t radix)
1886 {
1887         hammer_volume_t volume = info;
1888         hammer_supercl_t supercl;
1889         int32_t scl_no;
1890         int error = 0;
1891
1892         /*
1893          * Calculate the super-cluster number containing the cluster (blk)
1894          * and obtain the super-cluster buffer.
1895          */
1896         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1897         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1898         if (supercl)
1899                 hammer_rel_supercl(supercl, 0);
1900         return (error);
1901 }
1902
1903 static int
1904 super_alist_destroy(void *info, int32_t blk, int32_t radix)
1905 {
1906         return(0);
1907 }
1908
1909 static int
1910 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1911                       int32_t count, int32_t atblk, int32_t *fullp)
1912 {
1913         hammer_volume_t volume = info;
1914         hammer_supercl_t supercl;
1915         int32_t scl_no;
1916         int32_t r;
1917         int error = 0;
1918
1919         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1920         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1921         if (supercl) {
1922                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
1923                 if (r != HAMMER_ALIST_BLOCK_NONE)
1924                         r += blk;
1925                 *fullp = hammer_alist_isfull(&supercl->alist);
1926                 hammer_rel_supercl(supercl, 0);
1927         } else {
1928                 r = HAMMER_ALIST_BLOCK_NONE;
1929                 *fullp = 0;
1930         }
1931         return(r);
1932 }
1933
1934 static int
1935 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1936                       int32_t count, int32_t atblk, int32_t *fullp)
1937 {
1938         hammer_volume_t volume = info;
1939         hammer_supercl_t supercl;
1940         int32_t scl_no;
1941         int32_t r;
1942         int error = 0;
1943
1944         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1945         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1946         if (supercl) {
1947                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
1948                 if (r != HAMMER_ALIST_BLOCK_NONE)
1949                         r += blk;
1950                 *fullp = hammer_alist_isfull(&supercl->alist);
1951                 hammer_rel_supercl(supercl, 0);
1952         } else { 
1953                 r = HAMMER_ALIST_BLOCK_NONE;
1954                 *fullp = 0;
1955         }
1956         return(r);
1957 }
1958
1959 static void
1960 super_alist_free(void *info, int32_t blk, int32_t radix,
1961                  int32_t base_blk, int32_t count, int32_t *emptyp)
1962 {
1963         hammer_volume_t volume = info;
1964         hammer_supercl_t supercl;
1965         int32_t scl_no;
1966         int error = 0;
1967
1968         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1969         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1970         if (supercl) {
1971                 hammer_alist_free(&supercl->alist, base_blk, count);
1972                 *emptyp = hammer_alist_isempty(&supercl->alist);
1973                 hammer_rel_supercl(supercl, 0);
1974         } else {
1975                 *emptyp = 0;
1976         }
1977 }
1978
1979 static void
1980 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
1981 {
1982 }
1983
1984 void
1985 hammer_init_alist_config(void)
1986 {
1987         hammer_alist_config_t config;
1988
1989         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
1990                               1, HAMMER_FSBUF_METAELMS);
1991         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
1992                               1, HAMMER_VOL_METAELMS_1LYR);
1993         hammer_alist_template(&Vol_super_alist_config,
1994                               HAMMER_VOL_MAXSUPERCLUSTERS,
1995                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
1996         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
1997                               1, HAMMER_SUPERCL_METAELMS);
1998         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
1999                               1, HAMMER_CLU_MASTER_METAELMS);
2000         hammer_alist_template(&Clu_slave_alist_config, HAMMER_CLU_MAXBUFFERS,
2001                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2002
2003         config = &Vol_super_alist_config;
2004         config->bl_radix_init = super_alist_init;
2005         config->bl_radix_destroy = super_alist_destroy;
2006         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2007         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2008         config->bl_radix_free = super_alist_free;
2009         config->bl_radix_print = super_alist_print;
2010
2011         config = &Clu_slave_alist_config;
2012         config->bl_radix_init = buffer_alist_init;
2013         config->bl_radix_destroy = buffer_alist_destroy;
2014         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2015         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2016         config->bl_radix_free = buffer_alist_free;
2017         config->bl_radix_print = buffer_alist_print;
2018 }
2019