Merge from vendor branch GDB:
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.20 2008/01/11 05:45:19 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl,
51                         hammer_alloc_state_t isnew);
52 static int hammer_load_cluster(hammer_cluster_t cluster,
53                         hammer_alloc_state_t isnew);
54 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
55 static int hammer_load_node(hammer_node_t node);
56 static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type,
57                         hammer_alist_t live,
58                         int32_t start, int *errorp,
59                         struct hammer_buffer **bufferp);
60 #if 0
61 static void readhammerbuf(hammer_volume_t vol, void *data,
62                         int64_t offset);
63 static void writehammerbuf(hammer_volume_t vol, const void *data,
64                         int64_t offset);
65 #endif
66 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
67 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
68 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
69                         int32_t start, int isfwd);
70 static void hammer_adjust_stats(hammer_cluster_t cluster,
71                         u_int64_t buf_type, int nblks);
72
73 struct hammer_alist_config Buf_alist_config;
74 struct hammer_alist_config Vol_normal_alist_config;
75 struct hammer_alist_config Vol_super_alist_config;
76 struct hammer_alist_config Supercl_alist_config;
77 struct hammer_alist_config Clu_master_alist_config;
78 struct hammer_alist_config Clu_slave_alist_config;
79
80 /*
81  * Red-Black tree support for various structures
82  */
83 static int
84 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
85 {
86         if (ip1->obj_id < ip2->obj_id)
87                 return(-1);
88         if (ip1->obj_id > ip2->obj_id)
89                 return(1);
90         if (ip1->obj_asof < ip2->obj_asof)
91                 return(-1);
92         if (ip1->obj_asof > ip2->obj_asof)
93                 return(1);
94         return(0);
95 }
96
97 static int
98 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
99 {
100         if (info->obj_id < ip->obj_id)
101                 return(-1);
102         if (info->obj_id > ip->obj_id)
103                 return(1);
104         if (info->obj_asof < ip->obj_asof)
105                 return(-1);
106         if (info->obj_asof > ip->obj_asof)
107                 return(1);
108         return(0);
109 }
110
111 static int
112 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
113 {
114         if (vol1->vol_no < vol2->vol_no)
115                 return(-1);
116         if (vol1->vol_no > vol2->vol_no)
117                 return(1);
118         return(0);
119 }
120
121 static int
122 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
123 {
124         if (cl1->scl_no < cl2->scl_no)
125                 return(-1);
126         if (cl1->scl_no > cl2->scl_no)
127                 return(1);
128         return(0);
129 }
130
131 static int
132 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
133 {
134         if (cl1->clu_no < cl2->clu_no)
135                 return(-1);
136         if (cl1->clu_no > cl2->clu_no)
137                 return(1);
138         return(0);
139 }
140
141 static int
142 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
143 {
144         if (buf1->buf_no < buf2->buf_no)
145                 return(-1);
146         if (buf1->buf_no > buf2->buf_no)
147                 return(1);
148         return(0);
149 }
150
151 static int
152 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
153 {
154         if (node1->node_offset < node2->node_offset)
155                 return(-1);
156         if (node1->node_offset > node2->node_offset)
157                 return(1);
158         return(0);
159 }
160
161 /*
162  * Note: The lookup function for hammer_ino_rb_tree winds up being named
163  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
164  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
165  */
166 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
167 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
168                 hammer_inode_info_cmp, hammer_inode_info_t);
169 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
170              hammer_vol_rb_compare, int32_t, vol_no);
171 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
172              hammer_scl_rb_compare, int32_t, scl_no);
173 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
174              hammer_clu_rb_compare, int32_t, clu_no);
175 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
176              hammer_buf_rb_compare, int32_t, buf_no);
177 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
178              hammer_nod_rb_compare, int32_t, node_offset);
179
180 /************************************************************************
181  *                              VOLUMES                                 *
182  ************************************************************************
183  *
184  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
185  * code on failure.  Volumes must be loaded at mount time, get_volume() will
186  * not load a new volume.
187  *
188  * Calls made to hammer_load_volume() or single-threaded
189  */
190 int
191 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
192 {
193         struct mount *mp;
194         hammer_volume_t volume;
195         struct hammer_volume_ondisk *ondisk;
196         struct nlookupdata nd;
197         struct buf *bp = NULL;
198         int error;
199         int ronly;
200
201         mp = hmp->mp;
202         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
203
204         /*
205          * Allocate a volume structure
206          */
207         ++hammer_count_volumes;
208         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
209         volume->vol_name = kstrdup(volname, M_HAMMER);
210         volume->hmp = hmp;
211         hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
212         volume->io.offset = 0LL;
213
214         /*
215          * Get the device vnode
216          */
217         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
218         if (error == 0)
219                 error = nlookup(&nd);
220         if (error == 0)
221                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
222         nlookup_done(&nd);
223         if (error == 0) {
224                 vn_isdisk(volume->devvp, &error);
225         }
226         if (error == 0) {
227                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
228                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
229                                  FSCRED, NULL);
230                 vn_unlock(volume->devvp);
231         }
232         if (error) {
233                 hammer_free_volume(volume);
234                 return(error);
235         }
236
237         /*
238          * Extract the volume number from the volume header and do various
239          * sanity checks.
240          */
241         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
242         if (error)
243                 goto late_failure;
244         ondisk = (void *)bp->b_data;
245         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
246                 kprintf("hammer_mount: volume %s has an invalid header\n",
247                         volume->vol_name);
248                 error = EFTYPE;
249                 goto late_failure;
250         }
251         volume->vol_no = ondisk->vol_no;
252         volume->cluster_base = ondisk->vol_clo_beg;
253         volume->vol_clsize = ondisk->vol_clsize;
254         volume->vol_flags = ondisk->vol_flags;
255         volume->nblocks = ondisk->vol_nblocks; 
256         RB_INIT(&volume->rb_clus_root);
257         RB_INIT(&volume->rb_scls_root);
258
259         hmp->mp->mnt_stat.f_blocks += volume->nblocks;
260
261         if (RB_EMPTY(&hmp->rb_vols_root)) {
262                 hmp->fsid = ondisk->vol_fsid;
263         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
264                 kprintf("hammer_mount: volume %s's fsid does not match "
265                         "other volumes\n", volume->vol_name);
266                 error = EFTYPE;
267                 goto late_failure;
268         }
269
270         /*
271          * Insert the volume structure into the red-black tree.
272          */
273         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
274                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
275                         volume->vol_name, volume->vol_no);
276                 error = EEXIST;
277         }
278
279         /*
280          * Set the root volume and load the root cluster.  HAMMER special
281          * cases rootvol and rootcl and will not deallocate the structures.
282          * We do not hold a ref because this would prevent related I/O
283          * from being flushed.
284          */
285         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
286                 hmp->rootvol = volume;
287                 hmp->rootcl = hammer_get_cluster(volume,
288                                                  ondisk->vol0_root_clu_no,
289                                                  &error, 0);
290                 hammer_rel_cluster(hmp->rootcl, 0);
291                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
292         }
293 late_failure:
294         if (bp)
295                 brelse(bp);
296         if (error) {
297                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
298                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
299                 hammer_free_volume(volume);
300         }
301         return (error);
302 }
303
304 /*
305  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
306  * so returns -1 on failure.
307  */
308 int
309 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
310 {
311         struct hammer_mount *hmp = volume->hmp;
312         hammer_cluster_t rootcl;
313         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
314
315         /*
316          * Sync clusters, sync volume
317          */
318
319         hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
320
321         /*
322          * Clean up the root cluster, which is held unlocked in the root
323          * volume.
324          */
325         if (hmp->rootvol == volume) {
326                 if ((rootcl = hmp->rootcl) != NULL)
327                         hmp->rootcl = NULL;
328                 hmp->rootvol = NULL;
329         }
330
331         /*
332          * Unload clusters and super-clusters.  Unloading a super-cluster
333          * also unloads related clusters, but the filesystem may not be
334          * using super-clusters so unload clusters anyway.
335          */
336         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
337                         hammer_unload_cluster, NULL);
338         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
339                         hammer_unload_supercl, NULL);
340         hammer_io_waitdep(&volume->io);
341
342         /*
343          * Release our buffer and flush anything left in the buffer cache.
344          */
345         hammer_io_release(&volume->io, 2);
346
347         /*
348          * There should be no references on the volume, no clusters, and
349          * no super-clusters.
350          */
351         KKASSERT(volume->io.lock.refs == 0);
352         KKASSERT(RB_EMPTY(&volume->rb_clus_root));
353         KKASSERT(RB_EMPTY(&volume->rb_scls_root));
354
355         volume->ondisk = NULL;
356         if (volume->devvp) {
357                 if (ronly) {
358                         vinvalbuf(volume->devvp, 0, 0, 0);
359                         VOP_CLOSE(volume->devvp, FREAD);
360                 } else {
361                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
362                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
363                 }
364         }
365
366         /*
367          * Destroy the structure
368          */
369         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
370         hammer_free_volume(volume);
371         return(0);
372 }
373
374 static
375 void
376 hammer_free_volume(hammer_volume_t volume)
377 {
378         if (volume->vol_name) {
379                 kfree(volume->vol_name, M_HAMMER);
380                 volume->vol_name = NULL;
381         }
382         if (volume->devvp) {
383                 vrele(volume->devvp);
384                 volume->devvp = NULL;
385         }
386         --hammer_count_volumes;
387         kfree(volume, M_HAMMER);
388 }
389
390 /*
391  * Get a HAMMER volume.  The volume must already exist.
392  */
393 hammer_volume_t
394 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
395 {
396         struct hammer_volume *volume;
397
398         /*
399          * Locate the volume structure
400          */
401         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
402         if (volume == NULL) {
403                 *errorp = ENOENT;
404                 return(NULL);
405         }
406         hammer_ref(&volume->io.lock);
407
408         /*
409          * Deal with on-disk info
410          */
411         if (volume->ondisk == NULL) {
412                 *errorp = hammer_load_volume(volume);
413                 if (*errorp) {
414                         hammer_rel_volume(volume, 1);
415                         volume = NULL;
416                 }
417         } else {
418                 *errorp = 0;
419         }
420         return(volume);
421 }
422
423 int
424 hammer_ref_volume(hammer_volume_t volume)
425 {
426         int error;
427
428         hammer_ref(&volume->io.lock);
429
430         /*
431          * Deal with on-disk info
432          */
433         if (volume->ondisk == NULL) {
434                 error = hammer_load_volume(volume);
435                 if (error)
436                         hammer_rel_volume(volume, 1);
437         } else {
438                 error = 0;
439         }
440         return (error);
441 }
442
443 hammer_volume_t
444 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
445 {
446         hammer_volume_t volume;
447
448         volume = hmp->rootvol;
449         KKASSERT(volume != NULL);
450         hammer_ref(&volume->io.lock);
451
452         /*
453          * Deal with on-disk info
454          */
455         if (volume->ondisk == NULL) {
456                 *errorp = hammer_load_volume(volume);
457                 if (*errorp) {
458                         hammer_rel_volume(volume, 1);
459                         volume = NULL;
460                 }
461         } else {
462                 *errorp = 0;
463         }
464         return (volume);
465 }
466
467 /*
468  * Load a volume's on-disk information.  The volume must be referenced and
469  * not locked.  We temporarily acquire an exclusive lock to interlock
470  * against releases or multiple get's.
471  */
472 static int
473 hammer_load_volume(hammer_volume_t volume)
474 {
475         struct hammer_volume_ondisk *ondisk;
476         int error;
477
478         hammer_lock_ex(&volume->io.lock);
479         if (volume->ondisk == NULL) {
480                 error = hammer_io_read(volume->devvp, &volume->io);
481                 if (error) {
482                         hammer_unlock(&volume->io.lock);
483                         return (error);
484                 }
485                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
486
487                 /*
488                  * Configure the volume's A-lists.  These are used to
489                  * allocate clusters.
490                  */
491                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
492                         volume->alist.config = &Vol_super_alist_config;
493                         volume->alist.meta = ondisk->vol_almeta.super;
494                         volume->alist.info = volume;
495                 } else {
496                         volume->alist.config = &Vol_normal_alist_config;
497                         volume->alist.meta = ondisk->vol_almeta.normal;
498                         volume->alist.info = NULL;
499                 }
500         } else {
501                 error = 0;
502         }
503         hammer_unlock(&volume->io.lock);
504         return(0);
505 }
506
507 /*
508  * Release a volume.  Call hammer_io_release on the last reference.  We have
509  * to acquire an exclusive lock to interlock against volume->ondisk tests
510  * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
511  * lock to be held.
512  *
513  * Volumes are not unloaded from memory during normal operation.
514  */
515 void
516 hammer_rel_volume(hammer_volume_t volume, int flush)
517 {
518         if (volume->io.lock.refs == 1) {
519                 hammer_lock_ex(&volume->io.lock);
520                 if (volume->io.lock.refs == 1) {
521                         volume->ondisk = NULL;
522                         hammer_io_release(&volume->io, flush);
523                 } else if (flush) {
524                         hammer_io_flush(&volume->io);
525                 }
526                 hammer_unlock(&volume->io.lock);
527         }
528         hammer_unref(&volume->io.lock);
529 }
530
531 /************************************************************************
532  *                              SUPER-CLUSTERS                          *
533  ************************************************************************
534  *
535  * Manage super-clusters.  Note that a supercl holds a reference to its
536  * associated volume.
537  */
538 static int
539 hammer_find_supercl(hammer_volume_t volume, int32_t scl_no)
540 {
541         if (RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no))
542                 return(1);
543         return(0);
544 }
545
546 hammer_supercl_t
547 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
548                    int *errorp, hammer_alloc_state_t isnew)
549 {
550         hammer_supercl_t supercl;
551
552         /*
553          * Locate and lock the super-cluster structure, creating one
554          * if necessary.
555          */
556 again:
557         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
558         if (supercl == NULL) {
559                 ++hammer_count_supercls;
560                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
561                 supercl->scl_no = scl_no;
562                 supercl->volume = volume;
563                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
564                 hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL);
565                 hammer_ref(&supercl->io.lock);
566
567                 /*
568                  * Insert the cluster into the RB tree and handle late
569                  * collisions.
570                  */
571                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
572                         hammer_unref(&supercl->io.lock);
573                         --hammer_count_supercls;
574                         kfree(supercl, M_HAMMER);
575                         goto again;
576                 }
577                 hammer_ref(&volume->io.lock);
578         } else {
579                 hammer_ref(&supercl->io.lock);
580         }
581
582         /*
583          * Deal with on-disk info
584          */
585         if (supercl->ondisk == NULL || isnew) {
586                 *errorp = hammer_load_supercl(supercl, isnew);
587                 if (*errorp) {
588                         hammer_rel_supercl(supercl, 1);
589                         supercl = NULL;
590                 }
591         } else {
592                 *errorp = 0;
593         }
594         return(supercl);
595 }
596
597 static int
598 hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew)
599 {
600         struct hammer_supercl_ondisk *ondisk;
601         hammer_volume_t volume = supercl->volume;
602         int error;
603         int64_t nclusters;
604
605         hammer_lock_ex(&supercl->io.lock);
606         if (supercl->ondisk == NULL) {
607                 if (isnew)
608                         error = hammer_io_new(volume->devvp, &supercl->io);
609                 else
610                         error = hammer_io_read(volume->devvp, &supercl->io);
611                 if (error) {
612                         hammer_unlock(&supercl->io.lock);
613                         return (error);
614                 }
615                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
616
617                 supercl->alist.config = &Supercl_alist_config;
618                 supercl->alist.meta = ondisk->scl_meta;
619                 supercl->alist.info = NULL;
620         } else if (isnew) {
621                 error = hammer_io_new(volume->devvp, &supercl->io);
622         } else {
623                 error = 0;
624         }
625         if (error == 0 && isnew) {
626                 /*
627                  * If this is a new super-cluster we have to initialize
628                  * various ondisk structural elements.  The caller is
629                  * responsible for the remainder.
630                  */
631                 struct hammer_alist_live dummy;
632
633                 hammer_modify_supercl(supercl);
634
635                 ondisk = supercl->ondisk;
636                 dummy.config = &Buf_alist_config;
637                 dummy.meta = ondisk->head.buf_almeta;
638                 dummy.info = NULL;
639                 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
640
641                 nclusters = volume->ondisk->vol_nclusters -
642                             ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
643                 KKASSERT(nclusters > 0);
644                 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
645                         nclusters = HAMMER_SCL_MAXCLUSTERS;
646                 hammer_alist_init(&supercl->alist, 0, (int32_t)nclusters,
647                                   isnew);
648         }
649         hammer_unlock(&supercl->io.lock);
650         return (error);
651 }
652
653 /*
654  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
655  */
656 int
657 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
658 {
659         KKASSERT(supercl->io.lock.refs == 0);
660         hammer_ref(&supercl->io.lock);
661         hammer_rel_supercl(supercl, 2);
662         return(0);
663 }
664
665 /*
666  * Release a super-cluster.  We have to deal with several places where
667  * another thread can ref the super-cluster.
668  *
669  * Only destroy the structure itself if the related buffer cache buffer
670  * was disassociated from it.  This ties the management of the structure
671  * to the buffer cache subsystem.
672  */
673 void
674 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
675 {
676         hammer_volume_t volume;
677
678         if (supercl->io.lock.refs == 1) {
679                 hammer_lock_ex(&supercl->io.lock);
680                 if (supercl->io.lock.refs == 1) {
681                         hammer_io_release(&supercl->io, flush);
682                         if (supercl->io.bp == NULL &&
683                             supercl->io.lock.refs == 1) {
684                                 volume = supercl->volume;
685                                 RB_REMOVE(hammer_scl_rb_tree,
686                                           &volume->rb_scls_root, supercl);
687                                 supercl->volume = NULL; /* sanity */
688                                 --hammer_count_supercls;
689                                 kfree(supercl, M_HAMMER);
690                                 hammer_rel_volume(volume, 0);
691                                 return;
692                         }
693                 } else if (flush) {
694                         hammer_io_flush(&supercl->io);
695                 }
696                 hammer_unlock(&supercl->io.lock);
697         }
698         hammer_unref(&supercl->io.lock);
699 }
700
701 /************************************************************************
702  *                              CLUSTERS                                *
703  ************************************************************************
704  *
705  */
706 hammer_cluster_t
707 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
708                    int *errorp, hammer_alloc_state_t isnew)
709 {
710         hammer_cluster_t cluster;
711
712 again:
713         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
714         if (cluster == NULL) {
715                 ++hammer_count_clusters;
716                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
717                 cluster->clu_no = clu_no;
718                 cluster->volume = volume;
719                 RB_INIT(&cluster->rb_bufs_root);
720                 RB_INIT(&cluster->rb_nods_root);
721                 hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER);
722                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
723                 hammer_ref(&cluster->io.lock);
724
725                 /*
726                  * Insert the cluster into the RB tree and handle late
727                  * collisions.
728                  */
729                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
730                         hammer_unref(&cluster->io.lock);
731                         --hammer_count_clusters;
732                         kfree(cluster, M_HAMMER);
733                         goto again;
734                 }
735                 hammer_ref(&volume->io.lock);
736         } else {
737                 hammer_ref(&cluster->io.lock);
738         }
739
740         /*
741          * Deal with on-disk info
742          */
743         if (cluster->ondisk == NULL || isnew) {
744                 *errorp = hammer_load_cluster(cluster, isnew);
745                 if (*errorp) {
746                         hammer_rel_cluster(cluster, 1);
747                         cluster = NULL;
748                 }
749         } else {
750                 *errorp = 0;
751         }
752         return (cluster);
753 }
754
755 hammer_cluster_t
756 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
757 {
758         hammer_cluster_t cluster;
759
760         cluster = hmp->rootcl;
761         KKASSERT(cluster != NULL);
762         hammer_ref(&cluster->io.lock);
763
764         /*
765          * Deal with on-disk info
766          */
767         if (cluster->ondisk == NULL) {
768                 *errorp = hammer_load_cluster(cluster, 0);
769                 if (*errorp) {
770                         hammer_rel_cluster(cluster, 1);
771                         cluster = NULL;
772                 }
773         } else {
774                 *errorp = 0;
775         }
776         return (cluster);
777 }
778
779 static
780 int
781 hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew)
782 {
783         hammer_volume_t volume = cluster->volume;
784         struct hammer_cluster_ondisk *ondisk;
785         int error;
786
787         /*
788          * Load the cluster's on-disk info
789          */
790         hammer_lock_ex(&cluster->io.lock);
791         if (cluster->ondisk == NULL) {
792                 if (isnew)
793                         error = hammer_io_new(volume->devvp, &cluster->io);
794                 else
795                         error = hammer_io_read(volume->devvp, &cluster->io);
796                 if (error) {
797                         hammer_unlock(&cluster->io.lock);
798                         return (error);
799                 }
800                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
801
802                 cluster->alist_master.config = &Clu_master_alist_config;
803                 cluster->alist_master.meta = ondisk->clu_master_meta;
804                 cluster->alist_btree.config = &Clu_slave_alist_config;
805                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
806                 cluster->alist_btree.info = cluster;
807                 cluster->alist_record.config = &Clu_slave_alist_config;
808                 cluster->alist_record.meta = ondisk->clu_record_meta;
809                 cluster->alist_record.info = cluster;
810                 cluster->alist_mdata.config = &Clu_slave_alist_config;
811                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
812                 cluster->alist_mdata.info = cluster;
813
814                 if (isnew == 0) {
815                         /*
816                          * Recover a cluster that was marked open.  This
817                          * can be rather involved and block for a hefty
818                          * chunk of time.
819                          */
820                         if (ondisk->clu_flags & HAMMER_CLUF_OPEN)
821                                 hammer_recover(cluster);
822
823                         cluster->clu_btree_beg = ondisk->clu_btree_beg;
824                         cluster->clu_btree_end = ondisk->clu_btree_end;
825                 }
826         } else if (isnew) {
827                 error = hammer_io_new(volume->devvp, &cluster->io);
828         } else {
829                 error = 0;
830         }
831         if (error == 0 && isnew) {
832                 /*
833                  * If this is a new cluster we have to initialize
834                  * various ondisk structural elements.  The caller is
835                  * responsible for the remainder.
836                  */
837                 struct hammer_alist_live dummy;
838                 hammer_node_t croot;
839                 hammer_volume_ondisk_t voldisk;
840                 int32_t nbuffers;
841
842                 hammer_modify_cluster(cluster);
843                 ondisk = cluster->ondisk;
844                 voldisk = volume->ondisk;
845
846                 dummy.config = &Buf_alist_config;
847                 dummy.meta = ondisk->head.buf_almeta;
848                 dummy.info = NULL;
849                 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
850
851                 ondisk->vol_fsid = voldisk->vol_fsid;
852                 ondisk->vol_fstype = voldisk->vol_fstype;
853                 ondisk->clu_gen = 1;
854                 ondisk->clu_id = 0;     /* XXX */
855                 ondisk->clu_no = cluster->clu_no;
856                 ondisk->clu_flags = 0;
857                 ondisk->clu_start = HAMMER_BUFSIZE;
858                 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
859                 if (voldisk->vol_clo_end - cluster->io.offset >
860                     voldisk->vol_clsize) {
861                         ondisk->clu_limit = voldisk->vol_clsize;
862                 } else {
863                         ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
864                                                       cluster->io.offset);
865                 }
866                 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
867                 KKASSERT(isnew == HAMMER_ASTATE_FREE);
868                 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
869                                   HAMMER_ASTATE_FREE);
870                 hammer_alist_init(&cluster->alist_btree,
871                                   HAMMER_FSBUF_MAXBLKS,
872                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
873                                   HAMMER_ASTATE_ALLOC);
874                 hammer_alist_init(&cluster->alist_record,
875                                   HAMMER_FSBUF_MAXBLKS,
876                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
877                                   HAMMER_ASTATE_ALLOC);
878                 hammer_alist_init(&cluster->alist_mdata,
879                                   HAMMER_FSBUF_MAXBLKS,
880                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
881                                   HAMMER_ASTATE_ALLOC);
882
883                 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
884                 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
885                 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
886
887                 /*
888                  * Initialize the B-Tree.  We don't know what the caller
889                  * intends to do with the cluster so make sure it causes
890                  * an assertion if the caller makes no changes.
891                  */
892                 ondisk->clu_btree_parent_vol_no = -2;
893                 ondisk->clu_btree_parent_clu_no = -2;
894                 ondisk->clu_btree_parent_offset = -2;
895                 ondisk->clu_btree_parent_clu_gen = -2;
896
897                 croot = hammer_alloc_btree(cluster, &error);
898                 if (error == 0) {
899                         hammer_modify_node(croot);
900                         bzero(croot->ondisk, sizeof(*croot->ondisk));
901                         croot->ondisk->count = 0;
902                         croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
903                         hammer_modify_cluster(cluster);
904                         ondisk->clu_btree_root = croot->node_offset;
905                         hammer_rel_node(croot);
906                 }
907         }
908         hammer_unlock(&cluster->io.lock);
909         return (error);
910 }
911
912 /*
913  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
914  */
915 int
916 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
917 {
918         hammer_ref(&cluster->io.lock);
919         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
920                 hammer_unload_buffer, NULL);
921         hammer_io_waitdep(&cluster->io);
922         KKASSERT(cluster->io.lock.refs == 1);
923         hammer_rel_cluster(cluster, 2);
924         return(0);
925 }
926
927 /*
928  * Update the cluster's synchronization TID, which is used during cluster
929  * recovery.  NOTE: The cluster header is not written out until all related
930  * records have been written out.
931  */
932 void
933 hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid)
934 {
935         hammer_modify_cluster(cluster);
936         if (cluster->ondisk->synchronized_tid < tid)
937                 cluster->ondisk->synchronized_tid = tid;
938 }
939
940 /*
941  * Reference a cluster that is either already referenced or via a specially
942  * handled pointer (aka rootcl).
943  */
944 int
945 hammer_ref_cluster(hammer_cluster_t cluster)
946 {
947         int error;
948
949         KKASSERT(cluster != NULL);
950         hammer_ref(&cluster->io.lock);
951
952         /*
953          * Deal with on-disk info
954          */
955         if (cluster->ondisk == NULL) {
956                 error = hammer_load_cluster(cluster, 0);
957                 if (error)
958                         hammer_rel_cluster(cluster, 1);
959         } else {
960                 error = 0;
961         }
962         return(error);
963 }
964
965 /*
966  * Release a cluster.  We have to deal with several places where
967  * another thread can ref the cluster.
968  *
969  * Only destroy the structure itself if we no longer have an IO or any
970  * hammer buffers associated with the structure.
971  */
972 void
973 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
974 {
975         hammer_volume_t volume;
976
977         if (cluster->io.lock.refs == 1) {
978                 hammer_lock_ex(&cluster->io.lock);
979                 if (cluster->io.lock.refs == 1) {
980                         /*
981                          * Release the I/O.  If we or the kernel wants to
982                          * flush, this will release the bp.  Otherwise the
983                          * bp may be written and flushed passively by the
984                          * kernel later on.
985                          */
986                         hammer_io_release(&cluster->io, flush);
987
988                         /*
989                          * Final cleanup
990                          */
991                         if (cluster != cluster->volume->hmp->rootcl &&
992                             cluster->io.bp == NULL &&
993                             cluster->io.lock.refs == 1 &&
994                             RB_EMPTY(&cluster->rb_bufs_root)) {
995                                 KKASSERT(RB_EMPTY(&cluster->rb_nods_root));
996                                 volume = cluster->volume;
997                                 RB_REMOVE(hammer_clu_rb_tree,
998                                           &volume->rb_clus_root, cluster);
999                                 cluster->volume = NULL; /* sanity */
1000                                 --hammer_count_clusters;
1001                                 kfree(cluster, M_HAMMER);
1002                                 hammer_rel_volume(volume, 0);
1003                                 return;
1004                         }
1005                 } else if (flush) {
1006                         hammer_io_flush(&cluster->io);
1007                 }
1008                 hammer_unlock(&cluster->io.lock);
1009         }
1010         hammer_unref(&cluster->io.lock);
1011 }
1012
1013 /************************************************************************
1014  *                              BUFFERS                                 *
1015  ************************************************************************
1016  *
1017  * Manage buffers.  Note that a buffer holds a reference to its associated
1018  * cluster, and its cluster will hold a reference to the cluster's volume.
1019  *
1020  * A non-zero buf_type indicates that a new buffer should be created and
1021  * zero'd.
1022  */
1023 hammer_buffer_t
1024 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
1025                   u_int64_t buf_type, int *errorp)
1026 {
1027         hammer_buffer_t buffer;
1028
1029         /*
1030          * Find the buffer.  Note that buffer 0 corresponds to the cluster
1031          * header and should never be requested.
1032          */
1033         KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1034                  buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1035
1036         /*
1037          * Locate and lock the buffer structure, creating one if necessary.
1038          */
1039 again:
1040         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1041         if (buffer == NULL) {
1042                 ++hammer_count_buffers;
1043                 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1044                 buffer->buf_no = buf_no;
1045                 buffer->cluster = cluster;
1046                 buffer->volume = cluster->volume;
1047                 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
1048                 buffer->io.offset = cluster->io.offset +
1049                                     (buf_no * HAMMER_BUFSIZE);
1050                 TAILQ_INIT(&buffer->clist);
1051                 hammer_ref(&buffer->io.lock);
1052
1053                 /*
1054                  * Insert the cluster into the RB tree and handle late
1055                  * collisions.
1056                  */
1057                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1058                         hammer_unref(&buffer->io.lock);
1059                         --hammer_count_buffers;
1060                         kfree(buffer, M_HAMMER);
1061                         goto again;
1062                 }
1063                 hammer_ref(&cluster->io.lock);
1064         } else {
1065                 hammer_ref(&buffer->io.lock);
1066         }
1067
1068         /*
1069          * Deal with on-disk info
1070          */
1071         if (buffer->ondisk == NULL || buf_type) {
1072                 *errorp = hammer_load_buffer(buffer, buf_type);
1073                 if (*errorp) {
1074                         hammer_rel_buffer(buffer, 1);
1075                         buffer = NULL;
1076                 }
1077         } else {
1078                 *errorp = 0;
1079         }
1080         return(buffer);
1081 }
1082
1083 static int
1084 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1085 {
1086         hammer_volume_t volume;
1087         hammer_fsbuf_ondisk_t ondisk;
1088         int error;
1089
1090         /*
1091          * Load the buffer's on-disk info
1092          */
1093         volume = buffer->volume;
1094         hammer_lock_ex(&buffer->io.lock);
1095         if (buffer->ondisk == NULL) {
1096                 if (buf_type) {
1097                         error = hammer_io_new(volume->devvp, &buffer->io);
1098                 } else {
1099                         error = hammer_io_read(volume->devvp, &buffer->io);
1100                 }
1101                 if (error) {
1102                         hammer_unlock(&buffer->io.lock);
1103                         return (error);
1104                 }
1105                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1106                 buffer->alist.config = &Buf_alist_config;
1107                 buffer->alist.meta = ondisk->head.buf_almeta;
1108                 buffer->buf_type = ondisk->head.buf_type;
1109         } else if (buf_type) {
1110                 error = hammer_io_new(volume->devvp, &buffer->io);
1111         } else {
1112                 error = 0;
1113         }
1114         if (error == 0 && buf_type) {
1115                 hammer_modify_buffer(buffer);
1116                 ondisk = buffer->ondisk;
1117                 hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type);
1118                 buffer->buf_type = ondisk->head.buf_type;
1119         }
1120         hammer_unlock(&buffer->io.lock);
1121         return (error);
1122 }
1123
1124 /*
1125  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1126  */
1127 int
1128 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1129 {
1130         hammer_ref(&buffer->io.lock);
1131         hammer_flush_buffer_nodes(buffer);
1132         KKASSERT(buffer->io.lock.refs == 1);
1133         hammer_rel_buffer(buffer, 2);
1134         return(0);
1135 }
1136
1137 /*
1138  * Reference a buffer that is either already referenced or via a specially
1139  * handled pointer (aka cursor->buffer).
1140  */
1141 int
1142 hammer_ref_buffer(hammer_buffer_t buffer)
1143 {
1144         int error;
1145
1146         hammer_ref(&buffer->io.lock);
1147         if (buffer->ondisk == NULL) {
1148                 error = hammer_load_buffer(buffer, 0);
1149                 if (error) {
1150                         hammer_rel_buffer(buffer, 1);
1151                         /*
1152                          * NOTE: buffer pointer can become stale after
1153                          * the above release.
1154                          */
1155                 } else {
1156                         KKASSERT(buffer->buf_type ==
1157                                  buffer->ondisk->head.buf_type);
1158                 }
1159         } else {
1160                 error = 0;
1161         }
1162         return(error);
1163 }
1164
1165 /*
1166  * Release a buffer.  We have to deal with several places where
1167  * another thread can ref the buffer.
1168  *
1169  * Only destroy the structure itself if the related buffer cache buffer
1170  * was disassociated from it.  This ties the management of the structure
1171  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1172  * embedded io is referenced or not.
1173  */
1174 void
1175 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1176 {
1177         hammer_cluster_t cluster;
1178
1179         if (buffer->io.lock.refs == 1) {
1180                 hammer_lock_ex(&buffer->io.lock);
1181                 if (buffer->io.lock.refs == 1) {
1182                         hammer_io_release(&buffer->io, flush);
1183
1184                         if (buffer->io.bp == NULL &&
1185                             buffer->io.lock.refs == 1) {
1186                                 hammer_flush_buffer_nodes(buffer);
1187                                 KKASSERT(TAILQ_EMPTY(&buffer->clist));
1188                                 cluster = buffer->cluster;
1189                                 RB_REMOVE(hammer_buf_rb_tree,
1190                                           &cluster->rb_bufs_root, buffer);
1191                                 buffer->cluster = NULL; /* sanity */
1192                                 --hammer_count_buffers;
1193                                 kfree(buffer, M_HAMMER);
1194                                 hammer_rel_cluster(cluster, 0);
1195                                 return;
1196                         }
1197                 } else if (flush) {
1198                         hammer_io_flush(&buffer->io);
1199                 }
1200                 hammer_unlock(&buffer->io.lock);
1201         }
1202         hammer_unref(&buffer->io.lock);
1203 }
1204
1205 /************************************************************************
1206  *                              NODES                                   *
1207  ************************************************************************
1208  *
1209  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1210  * method used by the HAMMER filesystem.
1211  *
1212  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1213  * associated with its buffer, and will only referenced the buffer while
1214  * the node itself is referenced.
1215  *
1216  * A hammer_node can also be passively associated with other HAMMER
1217  * structures, such as inodes, while retaining 0 references.  These
1218  * associations can be cleared backwards using a pointer-to-pointer in
1219  * the hammer_node.
1220  *
1221  * This allows the HAMMER implementation to cache hammer_nodes long-term
1222  * and short-cut a great deal of the infrastructure's complexity.  In
1223  * most cases a cached node can be reacquired without having to dip into
1224  * either the buffer or cluster management code.
1225  *
1226  * The caller must pass a referenced cluster on call and will retain
1227  * ownership of the reference on return.  The node will acquire its own
1228  * additional references, if necessary.
1229  */
1230 hammer_node_t
1231 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1232 {
1233         hammer_node_t node;
1234
1235         /*
1236          * Locate the structure, allocating one if necessary.
1237          */
1238 again:
1239         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1240                          node_offset);
1241         if (node == NULL) {
1242                 ++hammer_count_nodes;
1243                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1244                 node->node_offset = node_offset;
1245                 node->cluster = cluster;
1246                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1247                               node)) {
1248                         --hammer_count_nodes;
1249                         kfree(node, M_HAMMER);
1250                         goto again;
1251                 }
1252         }
1253         hammer_ref(&node->lock);
1254         *errorp = hammer_load_node(node);
1255         if (*errorp) {
1256                 hammer_rel_node(node);
1257                 node = NULL;
1258         }
1259         return(node);
1260 }
1261
1262 /*
1263  * Reference an already-referenced node.
1264  */
1265 int
1266 hammer_ref_node(hammer_node_t node)
1267 {
1268         int error;
1269
1270         KKASSERT(node->lock.refs > 0);
1271         hammer_ref(&node->lock);
1272         if ((error = hammer_load_node(node)) != 0)
1273                 hammer_rel_node(node);
1274         return(error);
1275 }
1276
1277 /*
1278  * Load a node's on-disk data reference.
1279  */
1280 static int
1281 hammer_load_node(hammer_node_t node)
1282 {
1283         hammer_buffer_t buffer;
1284         int32_t buf_no;
1285         int error;
1286
1287         if (node->ondisk)
1288                 return(0);
1289         error = 0;
1290         hammer_lock_ex(&node->lock);
1291         if (node->ondisk == NULL) {
1292                 /*
1293                  * This is a little confusing but the jist is that
1294                  * node->buffer determines whether the node is on
1295                  * the buffer's clist and node->ondisk determines
1296                  * whether the buffer is referenced.
1297                  */
1298                 if ((buffer = node->buffer) != NULL) {
1299                         error = hammer_ref_buffer(buffer);
1300                 } else {
1301                         buf_no = node->node_offset / HAMMER_BUFSIZE;
1302                         buffer = hammer_get_buffer(node->cluster,
1303                                                    buf_no, 0, &error);
1304                         if (buffer) {
1305                                 KKASSERT(error == 0);
1306                                 TAILQ_INSERT_TAIL(&buffer->clist,
1307                                                   node, entry);
1308                                 node->buffer = buffer;
1309                         }
1310                 }
1311                 if (error == 0) {
1312                         node->ondisk = (void *)((char *)buffer->ondisk +
1313                                (node->node_offset & HAMMER_BUFMASK));
1314                 }
1315         }
1316         hammer_unlock(&node->lock);
1317         return (error);
1318 }
1319
1320 /*
1321  * Safely reference a node, interlock against flushes via the IO subsystem.
1322  */
1323 hammer_node_t
1324 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
1325                      int *errorp)
1326 {
1327         hammer_node_t node;
1328
1329         if ((node = *cache) != NULL)
1330                 hammer_ref(&node->lock);
1331         if (node) {
1332                 *errorp = hammer_load_node(node);
1333                 if (*errorp) {
1334                         hammer_rel_node(node);
1335                         node = NULL;
1336                 }
1337         } else {
1338                 *errorp = ENOENT;
1339         }
1340         return(node);
1341 }
1342
1343 /*
1344  * Release a hammer_node.  On the last release the node dereferences
1345  * its underlying buffer and may or may not be destroyed.
1346  */
1347 void
1348 hammer_rel_node(hammer_node_t node)
1349 {
1350         hammer_cluster_t cluster;
1351         hammer_buffer_t buffer;
1352         int32_t node_offset;
1353         int flags;
1354
1355         /*
1356          * If this isn't the last ref just decrement the ref count and
1357          * return.
1358          */
1359         if (node->lock.refs > 1) {
1360                 hammer_unref(&node->lock);
1361                 return;
1362         }
1363
1364         /*
1365          * If there is no ondisk info or no buffer the node failed to load,
1366          * remove the last reference and destroy the node.
1367          */
1368         if (node->ondisk == NULL) {
1369                 hammer_unref(&node->lock);
1370                 hammer_flush_node(node);
1371                 /* node is stale now */
1372                 return;
1373         }
1374
1375         /*
1376          * Do final cleanups and then either destroy the node and leave it
1377          * passively cached.  The buffer reference is removed regardless.
1378          */
1379         buffer = node->buffer;
1380         node->ondisk = NULL;
1381
1382         if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
1383                 hammer_unref(&node->lock);
1384                 hammer_rel_buffer(buffer, 0);
1385                 return;
1386         }
1387
1388         /*
1389          * Destroy the node.  Record pertainant data because the node
1390          * becomes stale the instant we flush it.
1391          */
1392         flags = node->flags;
1393         node_offset = node->node_offset;
1394         hammer_unref(&node->lock);
1395         hammer_flush_node(node);
1396         /* node is stale */
1397
1398         cluster = buffer->cluster;
1399         if (flags & HAMMER_NODE_DELETED) {
1400                 hammer_free_btree(cluster, node_offset);
1401                 if (node_offset == cluster->ondisk->clu_btree_root) {
1402                         kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1403                         hammer_free_cluster(cluster);
1404                         /*hammer_io_undirty(&cluster->io);*/
1405                 }
1406         }
1407         hammer_rel_buffer(buffer, 0);
1408 }
1409
1410 /*
1411  * Passively cache a referenced hammer_node in *cache.  The caller may
1412  * release the node on return.
1413  */
1414 void
1415 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1416 {
1417         hammer_node_t old;
1418
1419         /*
1420          * If the node is being deleted, don't cache it!
1421          */
1422         if (node->flags & HAMMER_NODE_DELETED)
1423                 return;
1424
1425         /*
1426          * Cache the node.  If we previously cached a different node we
1427          * have to give HAMMER a chance to destroy it.
1428          */
1429 again:
1430         if (node->cache1 != cache) {
1431                 if (node->cache2 != cache) {
1432                         if ((old = *cache) != NULL) {
1433                                 KKASSERT(node->lock.refs != 0);
1434                                 hammer_uncache_node(cache);
1435                                 goto again;
1436                         }
1437                         if (node->cache2)
1438                                 *node->cache2 = NULL;
1439                         node->cache2 = node->cache1;
1440                         node->cache1 = cache;
1441                         *cache = node;
1442                 } else {
1443                         struct hammer_node **tmp;
1444                         tmp = node->cache1;
1445                         node->cache1 = node->cache2;
1446                         node->cache2 = tmp;
1447                 }
1448         }
1449 }
1450
1451 void
1452 hammer_uncache_node(struct hammer_node **cache)
1453 {
1454         hammer_node_t node;
1455
1456         if ((node = *cache) != NULL) {
1457                 *cache = NULL;
1458                 if (node->cache1 == cache) {
1459                         node->cache1 = node->cache2;
1460                         node->cache2 = NULL;
1461                 } else if (node->cache2 == cache) {
1462                         node->cache2 = NULL;
1463                 } else {
1464                         panic("hammer_uncache_node: missing cache linkage");
1465                 }
1466                 if (node->cache1 == NULL && node->cache2 == NULL)
1467                         hammer_flush_node(node);
1468         }
1469 }
1470
1471 /*
1472  * Remove a node's cache references and destroy the node if it has no
1473  * other references or backing store.
1474  */
1475 void
1476 hammer_flush_node(hammer_node_t node)
1477 {
1478         hammer_buffer_t buffer;
1479
1480         if (node->cache1)
1481                 *node->cache1 = NULL;
1482         if (node->cache2)
1483                 *node->cache2 = NULL;
1484         if (node->lock.refs == 0 && node->ondisk == NULL) {
1485                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1486                           node);
1487                 if ((buffer = node->buffer) != NULL) {
1488                         node->buffer = NULL;
1489                         TAILQ_REMOVE(&buffer->clist, node, entry);
1490                         /* buffer is unreferenced because ondisk is NULL */
1491                 }
1492                 --hammer_count_nodes;
1493                 kfree(node, M_HAMMER);
1494         }
1495 }
1496
1497 /*
1498  * Flush passively cached B-Tree nodes associated with this buffer.
1499  * This is only called when the buffer is about to be destroyed, so
1500  * none of the nodes should have any references.
1501  */
1502 void
1503 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1504 {
1505         hammer_node_t node;
1506
1507         while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1508                 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1509                 hammer_ref(&node->lock);
1510                 node->flags |= HAMMER_NODE_FLUSH;
1511                 hammer_rel_node(node);
1512         }
1513 }
1514
1515 /************************************************************************
1516  *                              A-LIST ALLOCATORS                       *
1517  ************************************************************************/
1518
1519 /*
1520  * Allocate HAMMER clusters
1521  */
1522 hammer_cluster_t
1523 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1524                      int *errorp)
1525 {
1526         hammer_volume_t volume;
1527         hammer_cluster_t cluster;
1528         int32_t clu_no;
1529         int32_t clu_hint;
1530         int32_t vol_beg;
1531         int32_t vol_no;
1532
1533         /*
1534          * Figure out our starting volume and hint.
1535          */
1536         if (cluster_hint) {
1537                 vol_beg = cluster_hint->volume->vol_no;
1538                 clu_hint = cluster_hint->clu_no;
1539         } else {
1540                 vol_beg = hmp->volume_iterator;
1541                 clu_hint = -1;
1542         }
1543
1544         /*
1545          * Loop through volumes looking for a free cluster.  If allocating
1546          * a new cluster relative to an existing cluster try to find a free
1547          * cluster on either side (clu_hint >= 0), otherwise just do a
1548          * forwards iteration.
1549          */
1550         vol_no = vol_beg;
1551         do {
1552                 volume = hammer_get_volume(hmp, vol_no, errorp);
1553                 kprintf("VOLUME %p %d\n", volume, vol_no);
1554                 if (*errorp) {
1555                         clu_no = HAMMER_ALIST_BLOCK_NONE;
1556                         break;
1557                 }
1558                 hammer_modify_volume(volume);
1559                 if (clu_hint == -1) {
1560                         clu_hint = volume->clu_iterator;
1561                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1562                                                         clu_hint);
1563                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1564                                 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1565                                                                 1, 0);
1566                         }
1567                 } else {
1568                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1569                                                         clu_hint);
1570                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1571                                 clu_no = hammer_alist_alloc_rev(&volume->alist,
1572                                                                 1, clu_hint);
1573                         }
1574                 }
1575                 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1576                         break;
1577                 hammer_rel_volume(volume, 0);
1578                 volume = NULL;
1579                 *errorp = ENOSPC;
1580                 vol_no = (vol_no + 1) % hmp->nvolumes;
1581                 clu_hint = -1;
1582         } while (vol_no != vol_beg);
1583
1584         /*
1585          * Acquire the cluster.  On success this will force *errorp to 0.
1586          */
1587         if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1588                 kprintf("ALLOC CLUSTER %d:%d\n", volume->vol_no, clu_no);
1589                 cluster = hammer_get_cluster(volume, clu_no, errorp,
1590                                              HAMMER_ASTATE_FREE);
1591                 volume->clu_iterator = clu_no;
1592                 hammer_rel_volume(volume, 0);
1593         } else {
1594                 cluster = NULL;
1595         }
1596         if (cluster)
1597                 hammer_lock_ex(&cluster->io.lock);
1598         return(cluster);
1599 }
1600
1601 void
1602 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound, 
1603                     hammer_base_elm_t right_bound)
1604 {
1605         hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1606
1607         hammer_modify_cluster(cluster);
1608         ondisk->clu_btree_beg = *left_bound;
1609         ondisk->clu_btree_end = *right_bound;
1610         cluster->clu_btree_beg = ondisk->clu_btree_beg;
1611         cluster->clu_btree_end = ondisk->clu_btree_end;
1612 }
1613
1614 /*
1615  * Deallocate a cluster
1616  */
1617 void
1618 hammer_free_cluster(hammer_cluster_t cluster)
1619 {
1620         hammer_modify_cluster(cluster);
1621         hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1622 }
1623
1624 /*
1625  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1626  *
1627  * The passed *bufferp should be initialized to NULL.  On successive calls
1628  * *bufferp caches the most recent buffer used until put away by the caller.
1629  * Note that previously returned pointers using the cached buffer become
1630  * invalid on successive calls which reuse *bufferp.
1631  *
1632  * All allocations first attempt to use the block found at the specified
1633  * iterator.  If that fails the first available block is used.  If that
1634  * fails a new buffer is allocated and associated with the buffer type
1635  * A-list and the element is allocated out of the new buffer.
1636  */
1637
1638 hammer_node_t
1639 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1640 {
1641         hammer_buffer_t buffer;
1642         hammer_alist_t live;
1643         hammer_node_t node;
1644         int32_t elm_no;
1645         int32_t buf_no;
1646         int32_t node_offset;
1647
1648         /*
1649          * Allocate a B-Tree element
1650          */
1651         hammer_modify_cluster(cluster);
1652         buffer = NULL;
1653         live = &cluster->alist_btree;
1654         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1655         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1656                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1657         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1658                 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1659                                  cluster->ondisk->idx_index, errorp, &buffer);
1660                 elm_no = hammer_alist_alloc(live, 1);
1661                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1662                         *errorp = ENOSPC;
1663                         if (buffer)
1664                                 hammer_rel_buffer(buffer, 0);
1665                         return(NULL);
1666                 }
1667         }
1668         cluster->ondisk->idx_index = elm_no;
1669         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1670
1671         /*
1672          * Load and return the B-Tree element
1673          */
1674         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1675         node_offset = buf_no * HAMMER_BUFSIZE +
1676                       offsetof(union hammer_fsbuf_ondisk,
1677                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1678         node = hammer_get_node(cluster, node_offset, errorp);
1679         if (node) {
1680                 hammer_modify_node(node);
1681                 bzero(node->ondisk, sizeof(*node->ondisk));
1682         } else {
1683                 hammer_alist_free(live, elm_no, 1);
1684                 hammer_rel_node(node);
1685                 node = NULL;
1686         }
1687         if (buffer)
1688                 hammer_rel_buffer(buffer, 0);
1689         return(node);
1690 }
1691
1692 void *
1693 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1694                   int *errorp, struct hammer_buffer **bufferp)
1695 {
1696         hammer_buffer_t buffer;
1697         hammer_alist_t live;
1698         int32_t elm_no;
1699         int32_t buf_no;
1700         int32_t nblks;
1701         void *item;
1702
1703         /*
1704          * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
1705          * for these allocations.
1706          */
1707         hammer_modify_cluster(cluster);
1708         if ((bytes & HAMMER_BUFMASK) == 0) {
1709                 nblks = bytes / HAMMER_BUFSIZE;
1710                 /* only one block allowed for now (so buffer can hold it) */
1711                 KKASSERT(nblks == 1);
1712
1713                 buf_no = hammer_alloc_master(cluster, nblks,
1714                                              cluster->ondisk->idx_ldata, 1);
1715                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1716                         *errorp = ENOSPC;
1717                         return(NULL);
1718                 }
1719                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1720                 cluster->ondisk->idx_ldata = buf_no;
1721                 buffer = *bufferp;
1722                 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1723                 if (buffer)
1724                         hammer_rel_buffer(buffer, 0);
1725                 buffer = *bufferp;
1726                 return(buffer->ondisk);
1727         }
1728
1729         /*
1730          * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
1731          * (64 bytes) for these allocations.
1732          */
1733         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1734         nblks /= HAMMER_DATA_BLKSIZE;
1735         live = &cluster->alist_mdata;
1736         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1737         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1738                 elm_no = hammer_alist_alloc_fwd(live, nblks, 0);
1739         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1740                 alloc_new_buffer(cluster, HAMMER_FSBUF_DATA, live,
1741                                  cluster->ondisk->idx_data, errorp, bufferp);
1742                 elm_no = hammer_alist_alloc(live, nblks);
1743                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1744                         *errorp = ENOSPC;
1745                         return(NULL);
1746                 }
1747         }
1748         cluster->ondisk->idx_index = elm_no;
1749
1750         /*
1751          * Load and return the B-Tree element
1752          */
1753         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1754         buffer = *bufferp;
1755         if (buffer == NULL || buffer->cluster != cluster ||
1756             buffer->buf_no != buf_no) {
1757                 if (buffer)
1758                         hammer_rel_buffer(buffer, 0);
1759                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1760                 *bufferp = buffer;
1761         }
1762         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1763         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1764         hammer_modify_buffer(buffer);
1765         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1766         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1767         *errorp = 0;
1768         return(item);
1769 }
1770
1771 void *
1772 hammer_alloc_record(hammer_cluster_t cluster,
1773                     int *errorp, struct hammer_buffer **bufferp)
1774 {
1775         hammer_buffer_t buffer;
1776         hammer_alist_t live;
1777         int32_t elm_no;
1778         int32_t buf_no;
1779         void *item;
1780
1781         /*
1782          * Allocate a record element
1783          */
1784         hammer_modify_cluster(cluster);
1785         live = &cluster->alist_record;
1786         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1787         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1788                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1789         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1790                 alloc_new_buffer(cluster, HAMMER_FSBUF_RECORDS, live,
1791                                  cluster->ondisk->idx_record, errorp, bufferp);
1792                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1793                 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1794                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1795                         *errorp = ENOSPC;
1796                         return(NULL);
1797                 }
1798         }
1799         cluster->ondisk->idx_record = elm_no;
1800
1801         /*
1802          * Load and return the record element
1803          */
1804         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1805         buffer = *bufferp;
1806         if (buffer == NULL || buffer->cluster != cluster ||
1807             buffer->buf_no != buf_no) {
1808                 if (buffer)
1809                         hammer_rel_buffer(buffer, 0);
1810                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1811                 *bufferp = buffer;
1812         }
1813         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1814         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1815         hammer_modify_buffer(buffer);
1816         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1817         bzero(item, sizeof(union hammer_record_ondisk));
1818         *errorp = 0;
1819         return(item);
1820 }
1821
1822 void
1823 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1824 {
1825         int32_t elm_no;
1826         int32_t nblks;
1827         hammer_alist_t live;
1828
1829         hammer_modify_cluster(buffer->cluster);
1830         if ((bytes & HAMMER_BUFMASK) == 0) {
1831                 nblks = bytes / HAMMER_BUFSIZE;
1832                 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1833                 hammer_alist_free(&buffer->cluster->alist_master,
1834                                   buffer->buf_no, nblks);
1835                 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1836                 return;
1837         }
1838
1839         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1840                  HAMMER_DATA_BLKSIZE;
1841         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1842         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1843         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1844         nblks /= HAMMER_DATA_BLKSIZE;
1845         live = &buffer->cluster->alist_mdata;
1846         hammer_alist_free(live, elm_no, nblks);
1847 }
1848
1849 void
1850 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1851 {
1852         int32_t elm_no;
1853         hammer_alist_t live;
1854
1855         hammer_modify_cluster(buffer->cluster);
1856         elm_no = rec - &buffer->ondisk->record.recs[0];
1857         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1858         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1859         live = &buffer->cluster->alist_record;
1860         hammer_alist_free(live, elm_no, 1);
1861 }
1862
1863 void
1864 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1865 {
1866         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1867         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1868         hammer_alist_t live;
1869         int32_t elm_no;
1870
1871         hammer_modify_cluster(cluster);
1872         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1873         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1874         live = &cluster->alist_btree;
1875         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1876         elm_no += fsbuf_offset / blksize;
1877         hammer_alist_free(live, elm_no, 1);
1878 }
1879
1880 void
1881 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1882 {
1883         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1884         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1885         hammer_alist_t live;
1886         int32_t elm_no;
1887         int32_t buf_no;
1888         int32_t nblks;
1889
1890         hammer_modify_cluster(cluster);
1891         if ((bytes & HAMMER_BUFMASK) == 0) {
1892                 nblks = bytes / HAMMER_BUFSIZE;
1893                 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1894                 buf_no = bclu_offset / HAMMER_BUFSIZE;
1895                 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1896                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1897                 return;
1898         }
1899
1900         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1901         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1902         live = &cluster->alist_mdata;
1903         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1904         nblks /= HAMMER_DATA_BLKSIZE;
1905         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1906         elm_no += fsbuf_offset / blksize;
1907         hammer_alist_free(live, elm_no, nblks);
1908 }
1909
1910 void
1911 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1912 {
1913         const int32_t blksize = sizeof(union hammer_record_ondisk);
1914         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1915         hammer_alist_t live;
1916         int32_t elm_no;
1917
1918         hammer_modify_cluster(cluster);
1919         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1920         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1921         live = &cluster->alist_record;
1922         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1923         elm_no += fsbuf_offset / blksize;
1924         hammer_alist_free(live, elm_no, 1);
1925 }
1926
1927
1928 /*
1929  * Allocate a new filesystem buffer and assign it to the specified
1930  * filesystem buffer type.  The new buffer will be added to the
1931  * type-specific A-list and initialized.
1932  *
1933  * buffers used for records will also be added to the clu_record_buf_bitmap.
1934  */
1935 static void
1936 alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live,
1937                  int start, int *errorp, struct hammer_buffer **bufferp)
1938 {
1939         hammer_buffer_t buffer;
1940         int32_t buf_no;
1941         int32_t base_blk;
1942         int isfwd;
1943
1944         if (*bufferp)
1945                 hammer_rel_buffer(*bufferp, 0);
1946         *bufferp = NULL;
1947
1948         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1949         isfwd = (type != HAMMER_FSBUF_RECORDS);
1950         buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1951         if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1952                 *errorp = ENOSPC;
1953                 return;
1954         }
1955
1956         /*
1957          * The new buffer must be initialized (type != 0) regardless of
1958          * whether we already have it cached or not, so don't try to
1959          * optimize the cached buffer check.  Just call hammer_get_buffer().
1960          */
1961         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1962         *bufferp = buffer;
1963
1964         /*
1965          * Do a meta-free of the buffer's elements into the type-specific
1966          * A-list and update our statistics to reflect the allocation.
1967          */
1968         if (buffer) {
1969 #if 0
1970                 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1971                         buf_no, type, nelements);
1972 #endif
1973                 hammer_modify_buffer(buffer);  /*XXX*/
1974                 hammer_adjust_stats(cluster, type, 1);
1975
1976                 /*
1977                  * Free the buffer to the appropriate slave list so the
1978                  * cluster-based allocator sees it.
1979                  */
1980                 /*hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
1981                                   HAMMER_FSBUF_MAXBLKS);*/
1982                 base_blk = buf_no * HAMMER_FSBUF_MAXBLKS;
1983
1984                 switch(type) {
1985                 case HAMMER_FSBUF_BTREE:
1986                         hammer_alist_free(live, base_blk, HAMMER_BTREE_NODES);
1987                         break;
1988                 case HAMMER_FSBUF_DATA:
1989                         hammer_alist_free(live, base_blk, HAMMER_DATA_NODES);
1990                         break;
1991                 case HAMMER_FSBUF_RECORDS:
1992                         hammer_alist_free(live, base_blk, HAMMER_RECORD_NODES);
1993                         break;
1994                 }
1995         }
1996
1997         /*
1998          * And, finally, update clu_record_buf_bitmap for record buffers.
1999          * Since buffers are synced to disk before their associated cluster
2000          * header, a recovery operation will only see synced record buffers
2001          * in the bitmap.  XXX We can't use alist_record for recovery due
2002          * to the way we currently manage it.
2003          */
2004         if (buffer && type == HAMMER_FSBUF_RECORDS) {
2005                 KKASSERT(buf_no >= 0 && buf_no < HAMMER_CLU_MAXBUFFERS);
2006                 hammer_modify_cluster(cluster);
2007                 cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5] |=
2008                         (1 << (buf_no & 31));
2009         }
2010 }
2011
2012 /*
2013  * Sync dirty buffers to the media
2014  */
2015
2016 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2017 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2018
2019 int
2020 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2021 {
2022         struct hammer_sync_info info;
2023
2024         info.error = 0;
2025         info.waitfor = waitfor;
2026
2027         kprintf("hammer_sync\n");
2028         vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2029                       hammer_sync_scan1, hammer_sync_scan2, &info);
2030
2031         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2032                 hammer_sync_volume, &info);
2033         return(info.error);
2034 }
2035
2036 static int
2037 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2038 {
2039         struct hammer_inode *ip;
2040
2041         ip = VTOI(vp);
2042         if (vp->v_type == VNON || ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2043             RB_EMPTY(&vp->v_rbdirty_tree))) {
2044                 return(-1);
2045         }
2046         return(0);
2047 }
2048
2049 static int
2050 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2051 {
2052         struct hammer_sync_info *info = data;
2053         struct hammer_inode *ip;
2054         int error;
2055
2056         ip = VTOI(vp);
2057         if (vp->v_type == VNON || vp->v_type == VBAD ||
2058             ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2059              RB_EMPTY(&vp->v_rbdirty_tree))) {
2060                 return(0);
2061         }
2062         if (vp->v_type != VCHR) {
2063                 error = VOP_FSYNC(vp, info->waitfor);
2064                 if (error)
2065                         info->error = error;
2066         }
2067         return(0);
2068 }
2069
2070 int
2071 hammer_sync_volume(hammer_volume_t volume, void *data)
2072 {
2073         struct hammer_sync_info *info = data;
2074
2075         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2076                 hammer_sync_cluster, info);
2077         if (hammer_ref_volume(volume) == 0)
2078                 hammer_rel_volume(volume, 1);
2079         return(0);
2080 }
2081
2082 int
2083 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2084 {
2085         struct hammer_sync_info *info = data;
2086
2087         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2088                 hammer_sync_buffer, info);
2089         /*hammer_io_waitdep(&cluster->io);*/
2090         if (hammer_ref_cluster(cluster) == 0)
2091                 hammer_rel_cluster(cluster, 1);
2092         return(0);
2093 }
2094
2095 int
2096 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
2097 {
2098         if (hammer_ref_buffer(buffer) == 0)
2099                 hammer_rel_buffer(buffer, 1);
2100         return(0);
2101 }
2102
2103 /*
2104  * Generic buffer initialization.  Initialize the A-list into an all-allocated
2105  * state with the free block limit properly set.
2106  *
2107  * Note that alloc_new_buffer() will free the appropriate block range via
2108  * the appropriate cluster alist, so the free count is properly propogated.
2109  */
2110 void
2111 hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2112 {
2113         head->buf_type = type;
2114
2115         switch(type) {
2116         case HAMMER_FSBUF_BTREE:
2117                 hammer_alist_init(live, 0, HAMMER_BTREE_NODES,
2118                                   HAMMER_ASTATE_ALLOC);
2119                 break;
2120         case HAMMER_FSBUF_DATA:
2121                 hammer_alist_init(live, 0, HAMMER_DATA_NODES,
2122                                   HAMMER_ASTATE_ALLOC);
2123                 break;
2124         case HAMMER_FSBUF_RECORDS:
2125                 hammer_alist_init(live, 0, HAMMER_RECORD_NODES,
2126                                   HAMMER_ASTATE_ALLOC);
2127                 break;
2128         default:
2129                 hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
2130                 break;
2131         }
2132 }
2133
2134 /*
2135  * Calculate the cluster's offset in the volume.  This calculation is
2136  * slightly more complex when using superclusters because superclusters
2137  * are grouped in blocks of 16, followed by 16 x N clusters where N
2138  * is the number of clusters a supercluster can manage.
2139  */
2140 static int64_t
2141 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2142 {
2143         int32_t scl_group;
2144         int64_t scl_group_size;
2145         int64_t off;
2146
2147         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2148                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2149                             HAMMER_SCL_MAXCLUSTERS;
2150                 scl_group_size = 
2151                             ((int64_t)HAMMER_BUFSIZE *
2152                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2153                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2154                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2155                 scl_group_size += 
2156                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2157
2158                 off = volume->cluster_base +
2159                       scl_group * scl_group_size +
2160                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2161                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2162                        HAMMER_VOL_SUPERCLUSTER_GROUP))
2163                       * volume->vol_clsize;
2164         } else {
2165                 off = volume->cluster_base +
2166                       (int64_t)clu_no * volume->vol_clsize;
2167         }
2168         return(off);
2169 }
2170
2171 /*
2172  * Calculate a super-cluster's offset in the volume.
2173  */
2174 static int64_t
2175 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2176 {
2177         int64_t off;
2178         int32_t scl_group;
2179         int64_t scl_group_size;
2180
2181         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2182         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2183         if (scl_group) {
2184                 scl_group_size = 
2185                             ((int64_t)HAMMER_BUFSIZE *
2186                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2187                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2188                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2189                 scl_group_size += 
2190                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2191                 off = volume->cluster_base + (scl_group * scl_group_size) +
2192                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2193         } else {
2194                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2195         }
2196         return(off);
2197 }
2198
2199 /*
2200  * Allocate nblks buffers from the cluster's master alist.
2201  */
2202 static int32_t
2203 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2204                     int32_t start, int isfwd)
2205 {
2206         int32_t buf_no;
2207
2208         hammer_modify_cluster(cluster);
2209         if (isfwd) {
2210                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2211                                                 nblks, start);
2212                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2213                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2214                                                 nblks, 0);
2215                 }
2216         } else {
2217                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2218                                                 nblks, start);
2219                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2220                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2221                                                 nblks, HAMMER_ALIST_BLOCK_MAX);
2222                 }
2223         }
2224
2225         /*
2226          * Recover space from empty record, b-tree, and data a-lists.
2227          */
2228
2229         return(buf_no);
2230 }
2231
2232 /*
2233  * Adjust allocation statistics
2234  */
2235 static void
2236 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2237 {
2238         hammer_modify_cluster(cluster);
2239         hammer_modify_volume(cluster->volume);
2240         hammer_modify_volume(cluster->volume->hmp->rootvol);
2241
2242         switch(buf_type) {
2243         case HAMMER_FSBUF_BTREE:
2244                 cluster->ondisk->stat_idx_bufs += nblks;
2245                 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2246                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2247                 break;
2248         case HAMMER_FSBUF_DATA:
2249                 cluster->ondisk->stat_data_bufs += nblks;
2250                 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2251                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2252                 break;
2253         case HAMMER_FSBUF_RECORDS:
2254                 cluster->ondisk->stat_rec_bufs += nblks;
2255                 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2256                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2257                 break;
2258         }
2259 }
2260
2261 /*
2262  * A-LIST SUPPORT
2263  *
2264  * Setup the parameters for the various A-lists we use in hammer.  The
2265  * supercluster A-list must be chained to the cluster A-list and cluster
2266  * slave A-lists are chained to buffer A-lists.
2267  *
2268  * See hammer_init_alist_config() below.
2269  */
2270
2271 /*
2272  * A-LIST - cluster recursion into a filesystem buffer
2273  *
2274  * In the init case the buffer has already been initialized by
2275  * alloc_new_buffer() when it allocated the buffer out of the master
2276  * alist and marked it as free in the slave alist.
2277  *
2278  * Because we use a somewhat odd mechanism to assign buffers to slave
2279  * pools we can't actually free the buffer back to the master alist in
2280  * buffer_alist_destroy(), but instead must deal with that logic somewhere
2281  * else.
2282  */
2283 static int
2284 buffer_alist_init(void *info, int32_t blk, int32_t radix,
2285                   hammer_alloc_state_t state)
2286 {
2287         return(0);
2288 }
2289
2290 static int
2291 buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2292 {
2293         hammer_cluster_t cluster = info;
2294         hammer_buffer_t buffer;
2295         int32_t buf_no;
2296         int error = 0;
2297
2298         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2299         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2300         if (buffer) {
2301                 hammer_modify_buffer(buffer);
2302                 error = hammer_alist_recover(&buffer->alist, blk, 0, count);
2303                 /* free block count is returned if >= 0 */
2304                 hammer_rel_buffer(buffer, 0);
2305         } else {
2306                 error = -error;
2307         }
2308         return (error);
2309 }
2310
2311 /*
2312  * Note: This routine is only called when freeing the last elements of
2313  * an initialized buffer.  Freeing all elements of the buffer when the
2314  * buffer was not previously initialized does not call this routine.
2315  */
2316 static int
2317 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2318 {
2319         hammer_cluster_t cluster = info;
2320         int32_t buf_no;
2321
2322         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2323         kprintf("destroy buffer %d:%d:%d\n", cluster->volume->vol_no, cluster->clu_no, buf_no);
2324         return (0);
2325 }
2326
2327 /*
2328  * Note: atblk can be negative and atblk - blk can go negative.
2329  */
2330 static int
2331 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2332                       int32_t count, int32_t atblk, int32_t *fullp)
2333 {
2334         hammer_cluster_t cluster = info;
2335         hammer_buffer_t buffer;
2336         int32_t buf_no;
2337         int32_t r;
2338         int error = 0;
2339
2340         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2341         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2342         if (buffer) {
2343                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2344
2345                 hammer_modify_buffer(buffer);
2346                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2347                 if (r != HAMMER_ALIST_BLOCK_NONE)
2348                         r += blk;
2349                 *fullp = hammer_alist_isfull(&buffer->alist);
2350                 hammer_rel_buffer(buffer, 0);
2351         } else {
2352                 r = HAMMER_ALIST_BLOCK_NONE;
2353         }
2354         return(r);
2355 }
2356
2357 static int
2358 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2359                       int32_t count, int32_t atblk, int32_t *fullp)
2360 {
2361         hammer_cluster_t cluster = info;
2362         hammer_buffer_t buffer;
2363         int32_t buf_no;
2364         int32_t r;
2365         int error = 0;
2366
2367         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2368         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2369         if (buffer) {
2370                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2371                 hammer_modify_buffer(buffer);
2372                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2373                 if (r != HAMMER_ALIST_BLOCK_NONE)
2374                         r += blk;
2375                 *fullp = hammer_alist_isfull(&buffer->alist);
2376                 hammer_rel_buffer(buffer, 0);
2377         } else {
2378                 r = HAMMER_ALIST_BLOCK_NONE;
2379                 *fullp = 0;
2380         }
2381         return(r);
2382 }
2383
2384 static void
2385 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2386                  int32_t base_blk, int32_t count, int32_t *emptyp)
2387 {
2388         hammer_cluster_t cluster = info;
2389         hammer_buffer_t buffer;
2390         int32_t buf_no;
2391         int error = 0;
2392
2393         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2394         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2395         if (buffer) {
2396                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2397                 hammer_modify_buffer(buffer);
2398                 hammer_alist_free(&buffer->alist, base_blk, count);
2399                 *emptyp = hammer_alist_isempty(&buffer->alist);
2400                 /* XXX don't bother updating the buffer is completely empty? */
2401                 hammer_rel_buffer(buffer, 0);
2402         } else {
2403                 *emptyp = 0;
2404         }
2405 }
2406
2407 static void
2408 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2409 {
2410 }
2411
2412 /*
2413  * A-LIST - super-cluster recursion into a cluster and cluster recursion
2414  * into a filesystem buffer.  A-List's are mostly self-contained entities,
2415  * but callbacks must be installed to recurse from one A-List to another.
2416  *
2417  * Implementing these callbacks allows us to operate a multi-layered A-List
2418  * as a single entity.
2419  */
2420
2421 /*
2422  * This occurs when allocating a cluster via the volume a-list and the
2423  * entry in the volume a-list indicated all-free.  The underlying supercl
2424  * has not yet been initialized.
2425  */
2426 static int
2427 super_alist_init(void *info, int32_t blk, int32_t radix,
2428                  hammer_alloc_state_t state)
2429 {
2430         hammer_volume_t volume = info;
2431         hammer_supercl_t supercl;
2432         int32_t scl_no;
2433         int error = 0;
2434
2435         /*
2436          * Calculate the super-cluster number containing the cluster (blk)
2437          * and obtain the super-cluster buffer.
2438          */
2439         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2440         supercl = hammer_get_supercl(volume, scl_no, &error, state);
2441         if (supercl)
2442                 hammer_rel_supercl(supercl, 0);
2443         return (error);
2444 }
2445
2446 static int
2447 super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2448 {
2449         hammer_volume_t volume = info;
2450         hammer_supercl_t supercl;
2451         int32_t scl_no;
2452         int error = 0;
2453
2454         /*
2455          * Calculate the super-cluster number containing the cluster (blk)
2456          * and obtain the super-cluster buffer.
2457          */
2458         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2459         supercl = hammer_get_supercl(volume, scl_no, &error,
2460                                      HAMMER_ASTATE_NONE);
2461         if (supercl) {
2462                 hammer_modify_supercl(supercl);
2463                 error = hammer_alist_recover(&supercl->alist, blk, 0, count);
2464                 /* free block count is returned if >= 0 */
2465                 hammer_rel_supercl(supercl, 0);
2466         } else {
2467                 error = -error;
2468         }
2469         return (error);
2470 }
2471
2472 /*
2473  * This occurs when freeing a cluster via the volume a-list and the
2474  * supercl is now 100% free.  We can destroy the supercl.
2475  *
2476  * What we actually do is just unset the modify bit so it doesn't get
2477  * written out.
2478  */
2479 static int
2480 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2481 {
2482         hammer_volume_t volume = info;
2483         hammer_supercl_t supercl;
2484         int32_t scl_no;
2485         int error = 0;
2486
2487         /*
2488          * Calculate the super-cluster number containing the cluster (blk)
2489          * and obtain the super-cluster buffer.
2490          */
2491         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2492         if (hammer_find_supercl(volume, scl_no)) {
2493                 supercl = hammer_get_supercl(volume, scl_no, &error,
2494                                              HAMMER_ASTATE_FREE);
2495                                              /* XXX */
2496                 hammer_io_clear_modify(&supercl->io);
2497                 if (supercl)
2498                         hammer_rel_supercl(supercl, 0);
2499         }
2500         return (error);
2501 }
2502
2503 static int
2504 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2505                       int32_t count, int32_t atblk, int32_t *fullp)
2506 {
2507         hammer_volume_t volume = info;
2508         hammer_supercl_t supercl;
2509         int32_t scl_no;
2510         int32_t r;
2511         int error = 0;
2512
2513         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2514         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2515         if (supercl) {
2516                 hammer_modify_supercl(supercl);
2517                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2518                 if (r != HAMMER_ALIST_BLOCK_NONE)
2519                         r += blk;
2520                 *fullp = hammer_alist_isfull(&supercl->alist);
2521                 hammer_rel_supercl(supercl, 0);
2522         } else {
2523                 r = HAMMER_ALIST_BLOCK_NONE;
2524                 *fullp = 0;
2525         }
2526         return(r);
2527 }
2528
2529 static int
2530 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2531                       int32_t count, int32_t atblk, int32_t *fullp)
2532 {
2533         hammer_volume_t volume = info;
2534         hammer_supercl_t supercl;
2535         int32_t scl_no;
2536         int32_t r;
2537         int error = 0;
2538
2539         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2540         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2541         if (supercl) {
2542                 hammer_modify_supercl(supercl);
2543                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2544                 if (r != HAMMER_ALIST_BLOCK_NONE)
2545                         r += blk;
2546                 *fullp = hammer_alist_isfull(&supercl->alist);
2547                 hammer_rel_supercl(supercl, 0);
2548         } else { 
2549                 r = HAMMER_ALIST_BLOCK_NONE;
2550                 *fullp = 0;
2551         }
2552         return(r);
2553 }
2554
2555 static void
2556 super_alist_free(void *info, int32_t blk, int32_t radix,
2557                  int32_t base_blk, int32_t count, int32_t *emptyp)
2558 {
2559         hammer_volume_t volume = info;
2560         hammer_supercl_t supercl;
2561         int32_t scl_no;
2562         int error = 0;
2563
2564         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2565         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2566         if (supercl) {
2567                 hammer_modify_supercl(supercl);
2568                 hammer_alist_free(&supercl->alist, base_blk, count);
2569                 *emptyp = hammer_alist_isempty(&supercl->alist);
2570                 hammer_rel_supercl(supercl, 0);
2571         } else {
2572                 *emptyp = 0;
2573         }
2574 }
2575
2576 static void
2577 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2578 {
2579 }
2580
2581 void
2582 hammer_init_alist_config(void)
2583 {
2584         hammer_alist_config_t config;
2585
2586         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2587                               1, HAMMER_FSBUF_METAELMS);
2588         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2589                               1, HAMMER_VOL_METAELMS_1LYR);
2590         hammer_alist_template(&Vol_super_alist_config,
2591                           HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2592                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2593         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2594                               1, HAMMER_SUPERCL_METAELMS);
2595         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2596                               1, HAMMER_CLU_MASTER_METAELMS);
2597         hammer_alist_template(&Clu_slave_alist_config,
2598                               HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2599                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2600
2601         config = &Vol_super_alist_config;
2602         config->bl_radix_init = super_alist_init;
2603         config->bl_radix_recover = super_alist_recover;
2604         config->bl_radix_destroy = super_alist_destroy;
2605         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2606         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2607         config->bl_radix_free = super_alist_free;
2608         config->bl_radix_print = super_alist_print;
2609
2610         config = &Clu_slave_alist_config;
2611         config->bl_radix_init = buffer_alist_init;
2612         config->bl_radix_recover = buffer_alist_recover;
2613         config->bl_radix_destroy = buffer_alist_destroy;
2614         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2615         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2616         config->bl_radix_free = buffer_alist_free;
2617         config->bl_radix_print = buffer_alist_print;
2618 }
2619