HAMMER 17/many: Refactor IO backend, clean up buffer cache deadlocks.
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.18 2008/01/10 07:41:03 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl,
51                         hammer_alloc_state_t isnew);
52 static int hammer_load_cluster(hammer_cluster_t cluster,
53                         hammer_alloc_state_t isnew);
54 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
55 static int hammer_load_node(hammer_node_t node);
56 static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type,
57                         hammer_alist_t live,
58                         int32_t start, int *errorp,
59                         struct hammer_buffer **bufferp);
60 #if 0
61 static void readhammerbuf(hammer_volume_t vol, void *data,
62                         int64_t offset);
63 static void writehammerbuf(hammer_volume_t vol, const void *data,
64                         int64_t offset);
65 #endif
66 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
67 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
68 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
69                         int32_t start, int isfwd);
70 static void hammer_adjust_stats(hammer_cluster_t cluster,
71                         u_int64_t buf_type, int nblks);
72
73 struct hammer_alist_config Buf_alist_config;
74 struct hammer_alist_config Vol_normal_alist_config;
75 struct hammer_alist_config Vol_super_alist_config;
76 struct hammer_alist_config Supercl_alist_config;
77 struct hammer_alist_config Clu_master_alist_config;
78 struct hammer_alist_config Clu_slave_alist_config;
79
80 /*
81  * Red-Black tree support for various structures
82  */
83 static int
84 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
85 {
86         if (ip1->obj_id < ip2->obj_id)
87                 return(-1);
88         if (ip1->obj_id > ip2->obj_id)
89                 return(1);
90         if (ip1->obj_asof < ip2->obj_asof)
91                 return(-1);
92         if (ip1->obj_asof > ip2->obj_asof)
93                 return(1);
94         return(0);
95 }
96
97 static int
98 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
99 {
100         if (info->obj_id < ip->obj_id)
101                 return(-1);
102         if (info->obj_id > ip->obj_id)
103                 return(1);
104         if (info->obj_asof < ip->obj_asof)
105                 return(-1);
106         if (info->obj_asof > ip->obj_asof)
107                 return(1);
108         return(0);
109 }
110
111 static int
112 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
113 {
114         if (vol1->vol_no < vol2->vol_no)
115                 return(-1);
116         if (vol1->vol_no > vol2->vol_no)
117                 return(1);
118         return(0);
119 }
120
121 static int
122 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
123 {
124         if (cl1->scl_no < cl2->scl_no)
125                 return(-1);
126         if (cl1->scl_no > cl2->scl_no)
127                 return(1);
128         return(0);
129 }
130
131 static int
132 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
133 {
134         if (cl1->clu_no < cl2->clu_no)
135                 return(-1);
136         if (cl1->clu_no > cl2->clu_no)
137                 return(1);
138         return(0);
139 }
140
141 static int
142 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
143 {
144         if (buf1->buf_no < buf2->buf_no)
145                 return(-1);
146         if (buf1->buf_no > buf2->buf_no)
147                 return(1);
148         return(0);
149 }
150
151 static int
152 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
153 {
154         if (node1->node_offset < node2->node_offset)
155                 return(-1);
156         if (node1->node_offset > node2->node_offset)
157                 return(1);
158         return(0);
159 }
160
161 /*
162  * Note: The lookup function for hammer_ino_rb_tree winds up being named
163  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
164  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
165  */
166 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
167 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
168                 hammer_inode_info_cmp, hammer_inode_info_t);
169 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
170              hammer_vol_rb_compare, int32_t, vol_no);
171 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
172              hammer_scl_rb_compare, int32_t, scl_no);
173 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
174              hammer_clu_rb_compare, int32_t, clu_no);
175 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
176              hammer_buf_rb_compare, int32_t, buf_no);
177 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
178              hammer_nod_rb_compare, int32_t, node_offset);
179
180 /************************************************************************
181  *                              VOLUMES                                 *
182  ************************************************************************
183  *
184  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
185  * code on failure.  Volumes must be loaded at mount time, get_volume() will
186  * not load a new volume.
187  *
188  * Calls made to hammer_load_volume() or single-threaded
189  */
190 int
191 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
192 {
193         struct mount *mp;
194         hammer_volume_t volume;
195         struct hammer_volume_ondisk *ondisk;
196         struct nlookupdata nd;
197         struct buf *bp = NULL;
198         int error;
199         int ronly;
200
201         mp = hmp->mp;
202         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
203
204         /*
205          * Allocate a volume structure
206          */
207         ++hammer_count_volumes;
208         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
209         volume->vol_name = kstrdup(volname, M_HAMMER);
210         volume->hmp = hmp;
211         hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
212         volume->io.offset = 0LL;
213
214         /*
215          * Get the device vnode
216          */
217         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
218         if (error == 0)
219                 error = nlookup(&nd);
220         if (error == 0)
221                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
222         nlookup_done(&nd);
223         if (error == 0) {
224                 vn_isdisk(volume->devvp, &error);
225         }
226         if (error == 0) {
227                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
228                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
229                                  FSCRED, NULL);
230                 vn_unlock(volume->devvp);
231         }
232         if (error) {
233                 hammer_free_volume(volume);
234                 return(error);
235         }
236
237         /*
238          * Extract the volume number from the volume header and do various
239          * sanity checks.
240          */
241         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
242         if (error)
243                 goto late_failure;
244         ondisk = (void *)bp->b_data;
245         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
246                 kprintf("hammer_mount: volume %s has an invalid header\n",
247                         volume->vol_name);
248                 error = EFTYPE;
249                 goto late_failure;
250         }
251         volume->vol_no = ondisk->vol_no;
252         volume->cluster_base = ondisk->vol_clo_beg;
253         volume->vol_clsize = ondisk->vol_clsize;
254         volume->vol_flags = ondisk->vol_flags;
255         volume->nblocks = ondisk->vol_nblocks; 
256         RB_INIT(&volume->rb_clus_root);
257         RB_INIT(&volume->rb_scls_root);
258
259         hmp->mp->mnt_stat.f_blocks += volume->nblocks;
260
261         if (RB_EMPTY(&hmp->rb_vols_root)) {
262                 hmp->fsid = ondisk->vol_fsid;
263         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
264                 kprintf("hammer_mount: volume %s's fsid does not match "
265                         "other volumes\n", volume->vol_name);
266                 error = EFTYPE;
267                 goto late_failure;
268         }
269
270         /*
271          * Insert the volume structure into the red-black tree.
272          */
273         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
274                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
275                         volume->vol_name, volume->vol_no);
276                 error = EEXIST;
277         }
278
279         /*
280          * Set the root volume and load the root cluster.  HAMMER special
281          * cases rootvol and rootcl and will not deallocate the structures.
282          * We do not hold a ref because this would prevent related I/O
283          * from being flushed.
284          */
285         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
286                 hmp->rootvol = volume;
287                 hmp->rootcl = hammer_get_cluster(volume,
288                                                  ondisk->vol0_root_clu_no,
289                                                  &error, 0);
290                 hammer_rel_cluster(hmp->rootcl, 0);
291                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
292         }
293 late_failure:
294         if (bp)
295                 brelse(bp);
296         if (error) {
297                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
298                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
299                 hammer_free_volume(volume);
300         }
301         return (error);
302 }
303
304 /*
305  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
306  * so returns -1 on failure.
307  */
308 int
309 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
310 {
311         struct hammer_mount *hmp = volume->hmp;
312         hammer_cluster_t rootcl;
313         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
314
315         /*
316          * Sync clusters, sync volume
317          */
318
319         hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
320
321         /*
322          * Clean up the root cluster, which is held unlocked in the root
323          * volume.
324          */
325         if (hmp->rootvol == volume) {
326                 if ((rootcl = hmp->rootcl) != NULL)
327                         hmp->rootcl = NULL;
328                 hmp->rootvol = NULL;
329         }
330
331         /*
332          * Unload clusters and super-clusters.  Unloading a super-cluster
333          * also unloads related clusters, but the filesystem may not be
334          * using super-clusters so unload clusters anyway.
335          */
336         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
337                         hammer_unload_cluster, NULL);
338         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
339                         hammer_unload_supercl, NULL);
340         hammer_io_waitdep(&volume->io);
341
342         /*
343          * Release our buffer and flush anything left in the buffer cache.
344          */
345         hammer_io_release(&volume->io, 2);
346
347         /*
348          * There should be no references on the volume, no clusters, and
349          * no super-clusters.
350          */
351         KKASSERT(volume->io.lock.refs == 0);
352         KKASSERT(RB_EMPTY(&volume->rb_clus_root));
353         KKASSERT(RB_EMPTY(&volume->rb_scls_root));
354
355         volume->ondisk = NULL;
356         if (volume->devvp) {
357                 if (ronly) {
358                         vinvalbuf(volume->devvp, 0, 0, 0);
359                         VOP_CLOSE(volume->devvp, FREAD);
360                 } else {
361                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
362                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
363                 }
364         }
365
366         /*
367          * Destroy the structure
368          */
369         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
370         hammer_free_volume(volume);
371         return(0);
372 }
373
374 static
375 void
376 hammer_free_volume(hammer_volume_t volume)
377 {
378         if (volume->vol_name) {
379                 kfree(volume->vol_name, M_HAMMER);
380                 volume->vol_name = NULL;
381         }
382         if (volume->devvp) {
383                 vrele(volume->devvp);
384                 volume->devvp = NULL;
385         }
386         --hammer_count_volumes;
387         kfree(volume, M_HAMMER);
388 }
389
390 /*
391  * Get a HAMMER volume.  The volume must already exist.
392  */
393 hammer_volume_t
394 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
395 {
396         struct hammer_volume *volume;
397
398         /*
399          * Locate the volume structure
400          */
401         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
402         if (volume == NULL) {
403                 *errorp = ENOENT;
404                 return(NULL);
405         }
406         hammer_ref(&volume->io.lock);
407
408         /*
409          * Deal with on-disk info
410          */
411         if (volume->ondisk == NULL) {
412                 *errorp = hammer_load_volume(volume);
413                 if (*errorp) {
414                         hammer_rel_volume(volume, 1);
415                         volume = NULL;
416                 }
417         } else {
418                 *errorp = 0;
419         }
420         return(volume);
421 }
422
423 int
424 hammer_ref_volume(hammer_volume_t volume)
425 {
426         int error;
427
428         hammer_ref(&volume->io.lock);
429
430         /*
431          * Deal with on-disk info
432          */
433         if (volume->ondisk == NULL) {
434                 error = hammer_load_volume(volume);
435                 if (error)
436                         hammer_rel_volume(volume, 1);
437         } else {
438                 error = 0;
439         }
440         return (error);
441 }
442
443 hammer_volume_t
444 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
445 {
446         hammer_volume_t volume;
447
448         volume = hmp->rootvol;
449         KKASSERT(volume != NULL);
450         hammer_ref(&volume->io.lock);
451
452         /*
453          * Deal with on-disk info
454          */
455         if (volume->ondisk == NULL) {
456                 *errorp = hammer_load_volume(volume);
457                 if (*errorp) {
458                         hammer_rel_volume(volume, 1);
459                         volume = NULL;
460                 }
461         } else {
462                 *errorp = 0;
463         }
464         return (volume);
465 }
466
467 /*
468  * Load a volume's on-disk information.  The volume must be referenced and
469  * not locked.  We temporarily acquire an exclusive lock to interlock
470  * against releases or multiple get's.
471  */
472 static int
473 hammer_load_volume(hammer_volume_t volume)
474 {
475         struct hammer_volume_ondisk *ondisk;
476         int error;
477
478         hammer_lock_ex(&volume->io.lock);
479         if (volume->ondisk == NULL) {
480                 error = hammer_io_read(volume->devvp, &volume->io);
481                 if (error) {
482                         hammer_unlock(&volume->io.lock);
483                         return (error);
484                 }
485                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
486
487                 /*
488                  * Configure the volume's A-lists.  These are used to
489                  * allocate clusters.
490                  */
491                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
492                         volume->alist.config = &Vol_super_alist_config;
493                         volume->alist.meta = ondisk->vol_almeta.super;
494                         volume->alist.info = volume;
495                 } else {
496                         volume->alist.config = &Vol_normal_alist_config;
497                         volume->alist.meta = ondisk->vol_almeta.normal;
498                         volume->alist.info = NULL;
499                 }
500         } else {
501                 error = 0;
502         }
503         hammer_unlock(&volume->io.lock);
504         return(0);
505 }
506
507 /*
508  * Release a volume.  Call hammer_io_release on the last reference.  We have
509  * to acquire an exclusive lock to interlock against volume->ondisk tests
510  * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
511  * lock to be held.
512  *
513  * Volumes are not unloaded from memory during normal operation.
514  */
515 void
516 hammer_rel_volume(hammer_volume_t volume, int flush)
517 {
518         if (volume->io.lock.refs == 1) {
519                 hammer_lock_ex(&volume->io.lock);
520                 if (volume->io.lock.refs == 1) {
521                         volume->ondisk = NULL;
522                         hammer_io_release(&volume->io, flush);
523                 } else if (flush) {
524                         hammer_io_flush(&volume->io);
525                 }
526                 hammer_unlock(&volume->io.lock);
527         }
528         hammer_unref(&volume->io.lock);
529 }
530
531 /************************************************************************
532  *                              SUPER-CLUSTERS                          *
533  ************************************************************************
534  *
535  * Manage super-clusters.  Note that a supercl holds a reference to its
536  * associated volume.
537  */
538 static int
539 hammer_find_supercl(hammer_volume_t volume, int32_t scl_no)
540 {
541         if (RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no))
542                 return(1);
543         return(0);
544 }
545
546 hammer_supercl_t
547 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
548                    int *errorp, hammer_alloc_state_t isnew)
549 {
550         hammer_supercl_t supercl;
551
552         /*
553          * Locate and lock the super-cluster structure, creating one
554          * if necessary.
555          */
556 again:
557         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
558         if (supercl == NULL) {
559                 ++hammer_count_supercls;
560                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
561                 supercl->scl_no = scl_no;
562                 supercl->volume = volume;
563                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
564                 hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL);
565                 hammer_ref(&supercl->io.lock);
566
567                 /*
568                  * Insert the cluster into the RB tree and handle late
569                  * collisions.
570                  */
571                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
572                         hammer_unref(&supercl->io.lock);
573                         --hammer_count_supercls;
574                         kfree(supercl, M_HAMMER);
575                         goto again;
576                 }
577                 hammer_ref(&volume->io.lock);
578         } else {
579                 hammer_ref(&supercl->io.lock);
580         }
581
582         /*
583          * Deal with on-disk info
584          */
585         if (supercl->ondisk == NULL || isnew) {
586                 *errorp = hammer_load_supercl(supercl, isnew);
587                 if (*errorp) {
588                         hammer_rel_supercl(supercl, 1);
589                         supercl = NULL;
590                 }
591         } else {
592                 *errorp = 0;
593         }
594         return(supercl);
595 }
596
597 static int
598 hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew)
599 {
600         struct hammer_supercl_ondisk *ondisk;
601         hammer_volume_t volume = supercl->volume;
602         int error;
603         int64_t nclusters;
604
605         hammer_lock_ex(&supercl->io.lock);
606         if (supercl->ondisk == NULL) {
607                 if (isnew)
608                         error = hammer_io_new(volume->devvp, &supercl->io);
609                 else
610                         error = hammer_io_read(volume->devvp, &supercl->io);
611                 if (error) {
612                         hammer_unlock(&supercl->io.lock);
613                         return (error);
614                 }
615                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
616
617                 supercl->alist.config = &Supercl_alist_config;
618                 supercl->alist.meta = ondisk->scl_meta;
619                 supercl->alist.info = NULL;
620         } else if (isnew) {
621                 error = hammer_io_new(volume->devvp, &supercl->io);
622         } else {
623                 error = 0;
624         }
625         if (error == 0 && isnew) {
626                 /*
627                  * If this is a new super-cluster we have to initialize
628                  * various ondisk structural elements.  The caller is
629                  * responsible for the remainder.
630                  */
631                 struct hammer_alist_live dummy;
632
633                 hammer_modify_supercl(supercl);
634
635                 ondisk = supercl->ondisk;
636                 dummy.config = &Buf_alist_config;
637                 dummy.meta = ondisk->head.buf_almeta;
638                 dummy.info = NULL;
639                 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
640
641                 nclusters = volume->ondisk->vol_nclusters -
642                             ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
643                 KKASSERT(nclusters > 0);
644                 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
645                         nclusters = HAMMER_SCL_MAXCLUSTERS;
646                 hammer_alist_init(&supercl->alist, 0, (int32_t)nclusters,
647                                   isnew);
648         }
649         hammer_unlock(&supercl->io.lock);
650         return (error);
651 }
652
653 /*
654  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
655  */
656 int
657 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
658 {
659         KKASSERT(supercl->io.lock.refs == 0);
660         hammer_ref(&supercl->io.lock);
661         hammer_rel_supercl(supercl, 2);
662         return(0);
663 }
664
665 /*
666  * Release a super-cluster.  We have to deal with several places where
667  * another thread can ref the super-cluster.
668  *
669  * Only destroy the structure itself if the related buffer cache buffer
670  * was disassociated from it.  This ties the management of the structure
671  * to the buffer cache subsystem.
672  */
673 void
674 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
675 {
676         hammer_volume_t volume;
677
678         if (supercl->io.lock.refs == 1) {
679                 hammer_lock_ex(&supercl->io.lock);
680                 if (supercl->io.lock.refs == 1) {
681                         hammer_io_release(&supercl->io, flush);
682                         if (supercl->io.bp == NULL &&
683                             supercl->io.lock.refs == 1) {
684                                 volume = supercl->volume;
685                                 RB_REMOVE(hammer_scl_rb_tree,
686                                           &volume->rb_scls_root, supercl);
687                                 supercl->volume = NULL; /* sanity */
688                                 --hammer_count_supercls;
689                                 kfree(supercl, M_HAMMER);
690                                 hammer_rel_volume(volume, 0);
691                                 return;
692                         }
693                 } else if (flush) {
694                         hammer_io_flush(&supercl->io);
695                 }
696                 hammer_unlock(&supercl->io.lock);
697         }
698         hammer_unref(&supercl->io.lock);
699 }
700
701 /************************************************************************
702  *                              CLUSTERS                                *
703  ************************************************************************
704  *
705  */
706 hammer_cluster_t
707 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
708                    int *errorp, hammer_alloc_state_t isnew)
709 {
710         hammer_cluster_t cluster;
711
712 again:
713         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
714         if (cluster == NULL) {
715                 ++hammer_count_clusters;
716                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
717                 cluster->clu_no = clu_no;
718                 cluster->volume = volume;
719                 RB_INIT(&cluster->rb_bufs_root);
720                 RB_INIT(&cluster->rb_nods_root);
721                 hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER);
722                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
723                 hammer_ref(&cluster->io.lock);
724
725                 /*
726                  * Insert the cluster into the RB tree and handle late
727                  * collisions.
728                  */
729                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
730                         hammer_unref(&cluster->io.lock);
731                         --hammer_count_clusters;
732                         kfree(cluster, M_HAMMER);
733                         goto again;
734                 }
735                 hammer_ref(&volume->io.lock);
736         } else {
737                 hammer_ref(&cluster->io.lock);
738         }
739
740         /*
741          * Deal with on-disk info
742          */
743         if (cluster->ondisk == NULL || isnew) {
744                 *errorp = hammer_load_cluster(cluster, isnew);
745                 if (*errorp) {
746                         hammer_rel_cluster(cluster, 1);
747                         cluster = NULL;
748                 }
749         } else {
750                 *errorp = 0;
751         }
752         return (cluster);
753 }
754
755 hammer_cluster_t
756 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
757 {
758         hammer_cluster_t cluster;
759
760         cluster = hmp->rootcl;
761         KKASSERT(cluster != NULL);
762         hammer_ref(&cluster->io.lock);
763
764         /*
765          * Deal with on-disk info
766          */
767         if (cluster->ondisk == NULL) {
768                 *errorp = hammer_load_cluster(cluster, 0);
769                 if (*errorp) {
770                         hammer_rel_cluster(cluster, 1);
771                         cluster = NULL;
772                 }
773         } else {
774                 *errorp = 0;
775         }
776         return (cluster);
777 }
778
779 static
780 int
781 hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew)
782 {
783         hammer_volume_t volume = cluster->volume;
784         struct hammer_cluster_ondisk *ondisk;
785         int error;
786
787         /*
788          * Load the cluster's on-disk info
789          */
790         hammer_lock_ex(&cluster->io.lock);
791         if (cluster->ondisk == NULL) {
792                 if (isnew)
793                         error = hammer_io_new(volume->devvp, &cluster->io);
794                 else
795                         error = hammer_io_read(volume->devvp, &cluster->io);
796                 if (error) {
797                         hammer_unlock(&cluster->io.lock);
798                         return (error);
799                 }
800                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
801
802                 cluster->alist_master.config = &Clu_master_alist_config;
803                 cluster->alist_master.meta = ondisk->clu_master_meta;
804                 cluster->alist_btree.config = &Clu_slave_alist_config;
805                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
806                 cluster->alist_btree.info = cluster;
807                 cluster->alist_record.config = &Clu_slave_alist_config;
808                 cluster->alist_record.meta = ondisk->clu_record_meta;
809                 cluster->alist_record.info = cluster;
810                 cluster->alist_mdata.config = &Clu_slave_alist_config;
811                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
812                 cluster->alist_mdata.info = cluster;
813
814                 if (isnew == 0) {
815                         /*
816                          * Recover a cluster that was marked open.  This
817                          * can be rather involved and block for a hefty
818                          * chunk of time.
819                          */
820                         if (ondisk->clu_flags & HAMMER_CLUF_OPEN)
821                                 hammer_recover(cluster);
822
823                         cluster->clu_btree_beg = ondisk->clu_btree_beg;
824                         cluster->clu_btree_end = ondisk->clu_btree_end;
825                 }
826         } else if (isnew) {
827                 error = hammer_io_new(volume->devvp, &cluster->io);
828         } else {
829                 error = 0;
830         }
831         if (error == 0 && isnew) {
832                 /*
833                  * If this is a new cluster we have to initialize
834                  * various ondisk structural elements.  The caller is
835                  * responsible for the remainder.
836                  */
837                 struct hammer_alist_live dummy;
838                 hammer_node_t croot;
839                 hammer_volume_ondisk_t voldisk;
840                 int32_t nbuffers;
841
842                 hammer_modify_cluster(cluster);
843                 ondisk = cluster->ondisk;
844                 voldisk = volume->ondisk;
845
846                 dummy.config = &Buf_alist_config;
847                 dummy.meta = ondisk->head.buf_almeta;
848                 dummy.info = NULL;
849                 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
850
851                 ondisk->vol_fsid = voldisk->vol_fsid;
852                 ondisk->vol_fstype = voldisk->vol_fstype;
853                 ondisk->clu_gen = 1;
854                 ondisk->clu_id = 0;     /* XXX */
855                 ondisk->clu_no = cluster->clu_no;
856                 ondisk->clu_flags = 0;
857                 ondisk->clu_start = HAMMER_BUFSIZE;
858                 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
859                 if (voldisk->vol_clo_end - cluster->io.offset >
860                     voldisk->vol_clsize) {
861                         ondisk->clu_limit = voldisk->vol_clsize;
862                 } else {
863                         ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
864                                                       cluster->io.offset);
865                 }
866                 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
867                 KKASSERT(isnew == HAMMER_ASTATE_FREE);
868                 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
869                                   HAMMER_ASTATE_FREE);
870                 hammer_alist_init(&cluster->alist_btree,
871                                   HAMMER_FSBUF_MAXBLKS,
872                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
873                                   HAMMER_ASTATE_ALLOC);
874                 hammer_alist_init(&cluster->alist_record,
875                                   HAMMER_FSBUF_MAXBLKS,
876                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
877                                   HAMMER_ASTATE_ALLOC);
878                 hammer_alist_init(&cluster->alist_mdata,
879                                   HAMMER_FSBUF_MAXBLKS,
880                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
881                                   HAMMER_ASTATE_ALLOC);
882
883                 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
884                 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
885                 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
886
887                 /*
888                  * Initialize the B-Tree.  We don't know what the caller
889                  * intends to do with the cluster so make sure it causes
890                  * an assertion if the caller makes no changes.
891                  */
892                 ondisk->clu_btree_parent_vol_no = -2;
893                 ondisk->clu_btree_parent_clu_no = -2;
894                 ondisk->clu_btree_parent_offset = -2;
895                 ondisk->clu_btree_parent_clu_gen = -2;
896
897                 croot = hammer_alloc_btree(cluster, &error);
898                 if (error == 0) {
899                         hammer_modify_node(croot);
900                         bzero(croot->ondisk, sizeof(*croot->ondisk));
901                         croot->ondisk->count = 0;
902                         croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
903                         hammer_modify_cluster(cluster);
904                         ondisk->clu_btree_root = croot->node_offset;
905                         hammer_rel_node(croot);
906                 }
907         }
908         hammer_unlock(&cluster->io.lock);
909         return (error);
910 }
911
912 /*
913  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
914  */
915 int
916 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
917 {
918         hammer_ref(&cluster->io.lock);
919         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
920                 hammer_unload_buffer, NULL);
921         hammer_io_waitdep(&cluster->io);
922         KKASSERT(cluster->io.lock.refs == 1);
923         hammer_rel_cluster(cluster, 2);
924         return(0);
925 }
926
927 /*
928  * Update the cluster's synchronization TID, which is used during cluster
929  * recovery.  NOTE: The cluster header is not written out until all related
930  * records have been written out.
931  */
932 void
933 hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid)
934 {
935         hammer_modify_cluster(cluster);
936         if (cluster->ondisk->synchronized_tid < tid)
937                 cluster->ondisk->synchronized_tid = tid;
938 }
939
940 /*
941  * Reference a cluster that is either already referenced or via a specially
942  * handled pointer (aka rootcl).
943  */
944 int
945 hammer_ref_cluster(hammer_cluster_t cluster)
946 {
947         int error;
948
949         KKASSERT(cluster != NULL);
950         hammer_ref(&cluster->io.lock);
951
952         /*
953          * Deal with on-disk info
954          */
955         if (cluster->ondisk == NULL) {
956                 error = hammer_load_cluster(cluster, 0);
957                 if (error)
958                         hammer_rel_cluster(cluster, 1);
959         } else {
960                 error = 0;
961         }
962         return(error);
963 }
964
965 /*
966  * Release a cluster.  We have to deal with several places where
967  * another thread can ref the cluster.
968  *
969  * Only destroy the structure itself if we no longer have an IO or any
970  * hammer buffers associated with the structure.
971  */
972 void
973 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
974 {
975         hammer_volume_t volume;
976
977         if (cluster->io.lock.refs == 1) {
978                 hammer_lock_ex(&cluster->io.lock);
979                 if (cluster->io.lock.refs == 1) {
980                         /*
981                          * Release the I/O.  If we or the kernel wants to
982                          * flush, this will release the bp.  Otherwise the
983                          * bp may be written and flushed passively by the
984                          * kernel later on.
985                          */
986                         hammer_io_release(&cluster->io, flush);
987
988                         /*
989                          * Final cleanup
990                          */
991                         if (cluster != cluster->volume->hmp->rootcl &&
992                             cluster->io.bp == NULL &&
993                             cluster->io.lock.refs == 1 &&
994                             RB_EMPTY(&cluster->rb_bufs_root)) {
995                                 KKASSERT(RB_EMPTY(&cluster->rb_nods_root));
996                                 volume = cluster->volume;
997                                 RB_REMOVE(hammer_clu_rb_tree,
998                                           &volume->rb_clus_root, cluster);
999                                 cluster->volume = NULL; /* sanity */
1000                                 --hammer_count_clusters;
1001                                 kfree(cluster, M_HAMMER);
1002                                 hammer_rel_volume(volume, 0);
1003                                 return;
1004                         }
1005                 } else if (flush) {
1006                         hammer_io_flush(&cluster->io);
1007                 }
1008                 hammer_unlock(&cluster->io.lock);
1009         }
1010         hammer_unref(&cluster->io.lock);
1011 }
1012
1013 /************************************************************************
1014  *                              BUFFERS                                 *
1015  ************************************************************************
1016  *
1017  * Manage buffers.  Note that a buffer holds a reference to its associated
1018  * cluster, and its cluster will hold a reference to the cluster's volume.
1019  *
1020  * A non-zero buf_type indicates that a new buffer should be created and
1021  * zero'd.
1022  */
1023 hammer_buffer_t
1024 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
1025                   u_int64_t buf_type, int *errorp)
1026 {
1027         hammer_buffer_t buffer;
1028
1029         /*
1030          * Find the buffer.  Note that buffer 0 corresponds to the cluster
1031          * header and should never be requested.
1032          */
1033         KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1034                  buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1035
1036         /*
1037          * Locate and lock the buffer structure, creating one if necessary.
1038          */
1039 again:
1040         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1041         if (buffer == NULL) {
1042                 ++hammer_count_buffers;
1043                 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1044                 buffer->buf_no = buf_no;
1045                 buffer->cluster = cluster;
1046                 buffer->volume = cluster->volume;
1047                 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
1048                 buffer->io.offset = cluster->io.offset +
1049                                     (buf_no * HAMMER_BUFSIZE);
1050                 TAILQ_INIT(&buffer->clist);
1051                 hammer_ref(&buffer->io.lock);
1052
1053                 /*
1054                  * Insert the cluster into the RB tree and handle late
1055                  * collisions.
1056                  */
1057                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1058                         hammer_unref(&buffer->io.lock);
1059                         --hammer_count_buffers;
1060                         kfree(buffer, M_HAMMER);
1061                         goto again;
1062                 }
1063                 hammer_ref(&cluster->io.lock);
1064         } else {
1065                 hammer_ref(&buffer->io.lock);
1066         }
1067
1068         /*
1069          * Deal with on-disk info
1070          */
1071         if (buffer->ondisk == NULL || buf_type) {
1072                 *errorp = hammer_load_buffer(buffer, buf_type);
1073                 if (*errorp) {
1074                         hammer_rel_buffer(buffer, 1);
1075                         buffer = NULL;
1076                 }
1077         } else {
1078                 *errorp = 0;
1079         }
1080         return(buffer);
1081 }
1082
1083 static int
1084 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1085 {
1086         hammer_volume_t volume;
1087         hammer_fsbuf_ondisk_t ondisk;
1088         int error;
1089
1090         /*
1091          * Load the buffer's on-disk info
1092          */
1093         volume = buffer->volume;
1094         hammer_lock_ex(&buffer->io.lock);
1095         if (buffer->ondisk == NULL) {
1096                 if (buf_type) {
1097                         error = hammer_io_new(volume->devvp, &buffer->io);
1098                 } else {
1099                         error = hammer_io_read(volume->devvp, &buffer->io);
1100                 }
1101                 if (error) {
1102                         hammer_unlock(&buffer->io.lock);
1103                         return (error);
1104                 }
1105                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1106                 buffer->alist.config = &Buf_alist_config;
1107                 buffer->alist.meta = ondisk->head.buf_almeta;
1108                 buffer->buf_type = ondisk->head.buf_type;
1109         } else if (buf_type) {
1110                 error = hammer_io_new(volume->devvp, &buffer->io);
1111         } else {
1112                 error = 0;
1113         }
1114         if (error == 0 && buf_type) {
1115                 hammer_modify_buffer(buffer);
1116                 ondisk = buffer->ondisk;
1117                 hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type);
1118                 buffer->buf_type = ondisk->head.buf_type;
1119         }
1120         hammer_unlock(&buffer->io.lock);
1121         return (error);
1122 }
1123
1124 /*
1125  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1126  */
1127 int
1128 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1129 {
1130         hammer_ref(&buffer->io.lock);
1131         hammer_flush_buffer_nodes(buffer);
1132         KKASSERT(buffer->io.lock.refs == 1);
1133         hammer_rel_buffer(buffer, 2);
1134         return(0);
1135 }
1136
1137 /*
1138  * Reference a buffer that is either already referenced or via a specially
1139  * handled pointer (aka cursor->buffer).
1140  */
1141 int
1142 hammer_ref_buffer(hammer_buffer_t buffer)
1143 {
1144         int error;
1145
1146         hammer_ref(&buffer->io.lock);
1147         if (buffer->ondisk == NULL) {
1148                 error = hammer_load_buffer(buffer, 0);
1149                 if (error) {
1150                         hammer_rel_buffer(buffer, 1);
1151                         /*
1152                          * NOTE: buffer pointer can become stale after
1153                          * the above release.
1154                          */
1155                 } else {
1156                         KKASSERT(buffer->buf_type ==
1157                                  buffer->ondisk->head.buf_type);
1158                 }
1159         } else {
1160                 error = 0;
1161         }
1162         return(error);
1163 }
1164
1165 /*
1166  * Release a buffer.  We have to deal with several places where
1167  * another thread can ref the buffer.
1168  *
1169  * Only destroy the structure itself if the related buffer cache buffer
1170  * was disassociated from it.  This ties the management of the structure
1171  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1172  * embedded io is referenced or not.
1173  */
1174 void
1175 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1176 {
1177         hammer_cluster_t cluster;
1178
1179         if (buffer->io.lock.refs == 1) {
1180                 hammer_lock_ex(&buffer->io.lock);
1181                 if (buffer->io.lock.refs == 1) {
1182                         hammer_io_release(&buffer->io, flush);
1183
1184                         if (buffer->io.bp == NULL &&
1185                             buffer->io.lock.refs == 1) {
1186                                 hammer_flush_buffer_nodes(buffer);
1187                                 KKASSERT(TAILQ_EMPTY(&buffer->clist));
1188                                 cluster = buffer->cluster;
1189                                 RB_REMOVE(hammer_buf_rb_tree,
1190                                           &cluster->rb_bufs_root, buffer);
1191                                 buffer->cluster = NULL; /* sanity */
1192                                 --hammer_count_buffers;
1193                                 kfree(buffer, M_HAMMER);
1194                                 hammer_rel_cluster(cluster, 0);
1195                                 return;
1196                         }
1197                 } else if (flush) {
1198                         hammer_io_flush(&buffer->io);
1199                 }
1200                 hammer_unlock(&buffer->io.lock);
1201         }
1202         hammer_unref(&buffer->io.lock);
1203 }
1204
1205 /************************************************************************
1206  *                              NODES                                   *
1207  ************************************************************************
1208  *
1209  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1210  * method used by the HAMMER filesystem.
1211  *
1212  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1213  * associated with its buffer, and will only referenced the buffer while
1214  * the node itself is referenced.
1215  *
1216  * A hammer_node can also be passively associated with other HAMMER
1217  * structures, such as inodes, while retaining 0 references.  These
1218  * associations can be cleared backwards using a pointer-to-pointer in
1219  * the hammer_node.
1220  *
1221  * This allows the HAMMER implementation to cache hammer_nodes long-term
1222  * and short-cut a great deal of the infrastructure's complexity.  In
1223  * most cases a cached node can be reacquired without having to dip into
1224  * either the buffer or cluster management code.
1225  *
1226  * The caller must pass a referenced cluster on call and will retain
1227  * ownership of the reference on return.  The node will acquire its own
1228  * additional references, if necessary.
1229  */
1230 hammer_node_t
1231 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1232 {
1233         hammer_node_t node;
1234
1235         /*
1236          * Locate the structure, allocating one if necessary.
1237          */
1238 again:
1239         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1240                          node_offset);
1241         if (node == NULL) {
1242                 ++hammer_count_nodes;
1243                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1244                 node->node_offset = node_offset;
1245                 node->cluster = cluster;
1246                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1247                               node)) {
1248                         --hammer_count_nodes;
1249                         kfree(node, M_HAMMER);
1250                         goto again;
1251                 }
1252         }
1253         hammer_ref(&node->lock);
1254         *errorp = hammer_load_node(node);
1255         if (*errorp) {
1256                 hammer_rel_node(node);
1257                 node = NULL;
1258         }
1259         return(node);
1260 }
1261
1262 /*
1263  * Reference an already-referenced node.
1264  */
1265 int
1266 hammer_ref_node(hammer_node_t node)
1267 {
1268         int error;
1269
1270         KKASSERT(node->lock.refs > 0);
1271         hammer_ref(&node->lock);
1272         if ((error = hammer_load_node(node)) != 0)
1273                 hammer_rel_node(node);
1274         return(error);
1275 }
1276
1277 /*
1278  * Load a node's on-disk data reference.
1279  */
1280 static int
1281 hammer_load_node(hammer_node_t node)
1282 {
1283         hammer_buffer_t buffer;
1284         int32_t buf_no;
1285         int error;
1286
1287         if (node->ondisk)
1288                 return(0);
1289         error = 0;
1290         hammer_lock_ex(&node->lock);
1291         if (node->ondisk == NULL) {
1292                 /*
1293                  * This is a little confusing but the jist is that
1294                  * node->buffer determines whether the node is on
1295                  * the buffer's clist and node->ondisk determines
1296                  * whether the buffer is referenced.
1297                  */
1298                 if ((buffer = node->buffer) != NULL) {
1299                         error = hammer_ref_buffer(buffer);
1300                 } else {
1301                         buf_no = node->node_offset / HAMMER_BUFSIZE;
1302                         buffer = hammer_get_buffer(node->cluster,
1303                                                    buf_no, 0, &error);
1304                         if (buffer) {
1305                                 KKASSERT(error == 0);
1306                                 TAILQ_INSERT_TAIL(&buffer->clist,
1307                                                   node, entry);
1308                                 node->buffer = buffer;
1309                         }
1310                 }
1311                 if (error == 0) {
1312                         node->ondisk = (void *)((char *)buffer->ondisk +
1313                                (node->node_offset & HAMMER_BUFMASK));
1314                 }
1315         }
1316         hammer_unlock(&node->lock);
1317         return (error);
1318 }
1319
1320 /*
1321  * Safely reference a node, interlock against flushes via the IO subsystem.
1322  */
1323 hammer_node_t
1324 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
1325                      int *errorp)
1326 {
1327         hammer_node_t node;
1328
1329         if ((node = *cache) != NULL)
1330                 hammer_ref(&node->lock);
1331         if (node) {
1332                 *errorp = hammer_load_node(node);
1333                 if (*errorp) {
1334                         hammer_rel_node(node);
1335                         node = NULL;
1336                 }
1337         } else {
1338                 *errorp = ENOENT;
1339         }
1340         return(node);
1341 }
1342
1343 /*
1344  * Release a hammer_node.  On the last release the node dereferences
1345  * its underlying buffer and may or may not be destroyed.
1346  */
1347 void
1348 hammer_rel_node(hammer_node_t node)
1349 {
1350         hammer_cluster_t cluster;
1351         hammer_buffer_t buffer;
1352         int32_t node_offset;
1353         int flags;
1354
1355         /*
1356          * If this isn't the last ref just decrement the ref count and
1357          * return.
1358          */
1359         if (node->lock.refs > 1) {
1360                 hammer_unref(&node->lock);
1361                 return;
1362         }
1363
1364         /*
1365          * If there is no ondisk info or no buffer the node failed to load,
1366          * remove the last reference and destroy the node.
1367          */
1368         if (node->ondisk == NULL) {
1369                 hammer_unref(&node->lock);
1370                 hammer_flush_node(node);
1371                 /* node is stale now */
1372                 return;
1373         }
1374
1375         /*
1376          * Do final cleanups and then either destroy the node and leave it
1377          * passively cached.  The buffer reference is removed regardless.
1378          */
1379         buffer = node->buffer;
1380         node->ondisk = NULL;
1381
1382         if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
1383                 hammer_unref(&node->lock);
1384                 hammer_rel_buffer(buffer, 0);
1385                 return;
1386         }
1387
1388         /*
1389          * Destroy the node.  Record pertainant data because the node
1390          * becomes stale the instant we flush it.
1391          */
1392         flags = node->flags;
1393         node_offset = node->node_offset;
1394         hammer_unref(&node->lock);
1395         hammer_flush_node(node);
1396         /* node is stale */
1397
1398         cluster = buffer->cluster;
1399         if (flags & HAMMER_NODE_DELETED) {
1400                 hammer_free_btree(cluster, node_offset);
1401                 if (node_offset == cluster->ondisk->clu_btree_root) {
1402                         kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1403                         hammer_free_cluster(cluster);
1404                         /*hammer_io_undirty(&cluster->io);*/
1405                 }
1406         }
1407         hammer_rel_buffer(buffer, 0);
1408 }
1409
1410 /*
1411  * Passively cache a referenced hammer_node in *cache.  The caller may
1412  * release the node on return.
1413  */
1414 void
1415 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1416 {
1417         hammer_node_t old;
1418
1419         /*
1420          * If the node is being deleted, don't cache it!
1421          */
1422         if (node->flags & HAMMER_NODE_DELETED)
1423                 return;
1424
1425         /*
1426          * Cache the node.  If we previously cached a different node we
1427          * have to give HAMMER a chance to destroy it.
1428          */
1429 again:
1430         if (node->cache1 != cache) {
1431                 if (node->cache2 != cache) {
1432                         if ((old = *cache) != NULL) {
1433                                 KKASSERT(node->lock.refs != 0);
1434                                 hammer_uncache_node(cache);
1435                                 goto again;
1436                         }
1437                         if (node->cache2)
1438                                 *node->cache2 = NULL;
1439                         node->cache2 = node->cache1;
1440                         node->cache1 = cache;
1441                         *cache = node;
1442                 } else {
1443                         struct hammer_node **tmp;
1444                         tmp = node->cache1;
1445                         node->cache1 = node->cache2;
1446                         node->cache2 = tmp;
1447                 }
1448         }
1449 }
1450
1451 void
1452 hammer_uncache_node(struct hammer_node **cache)
1453 {
1454         hammer_node_t node;
1455
1456         if ((node = *cache) != NULL) {
1457                 *cache = NULL;
1458                 if (node->cache1 == cache) {
1459                         node->cache1 = node->cache2;
1460                         node->cache2 = NULL;
1461                 } else if (node->cache2 == cache) {
1462                         node->cache2 = NULL;
1463                 } else {
1464                         panic("hammer_uncache_node: missing cache linkage");
1465                 }
1466                 if (node->cache1 == NULL && node->cache2 == NULL)
1467                         hammer_flush_node(node);
1468         }
1469 }
1470
1471 /*
1472  * Remove a node's cache references and destroy the node if it has no
1473  * other references or backing store.
1474  */
1475 void
1476 hammer_flush_node(hammer_node_t node)
1477 {
1478         hammer_buffer_t buffer;
1479
1480         if (node->cache1)
1481                 *node->cache1 = NULL;
1482         if (node->cache2)
1483                 *node->cache2 = NULL;
1484         if (node->lock.refs == 0 && node->ondisk == NULL) {
1485                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1486                           node);
1487                 if ((buffer = node->buffer) != NULL) {
1488                         node->buffer = NULL;
1489                         TAILQ_REMOVE(&buffer->clist, node, entry);
1490                         /* buffer is unreferenced because ondisk is NULL */
1491                 }
1492                 --hammer_count_nodes;
1493                 kfree(node, M_HAMMER);
1494         }
1495 }
1496
1497 /*
1498  * Flush passively cached B-Tree nodes associated with this buffer.
1499  * This is only called when the buffer is about to be destroyed, so
1500  * none of the nodes should have any references.
1501  */
1502 void
1503 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1504 {
1505         hammer_node_t node;
1506
1507         while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1508                 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1509                 hammer_ref(&node->lock);
1510                 node->flags |= HAMMER_NODE_FLUSH;
1511                 hammer_rel_node(node);
1512         }
1513 }
1514
1515 /************************************************************************
1516  *                              A-LIST ALLOCATORS                       *
1517  ************************************************************************/
1518
1519 /*
1520  * Allocate HAMMER clusters
1521  */
1522 hammer_cluster_t
1523 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1524                      int *errorp)
1525 {
1526         hammer_volume_t volume;
1527         hammer_cluster_t cluster;
1528         int32_t clu_no;
1529         int32_t clu_hint;
1530         int32_t vol_beg;
1531         int32_t vol_no;
1532
1533         /*
1534          * Figure out our starting volume and hint.
1535          */
1536         if (cluster_hint) {
1537                 vol_beg = cluster_hint->volume->vol_no;
1538                 clu_hint = cluster_hint->clu_no;
1539         } else {
1540                 vol_beg = hmp->volume_iterator;
1541                 clu_hint = -1;
1542         }
1543
1544         /*
1545          * Loop through volumes looking for a free cluster.  If allocating
1546          * a new cluster relative to an existing cluster try to find a free
1547          * cluster on either side (clu_hint >= 0), otherwise just do a
1548          * forwards iteration.
1549          */
1550         vol_no = vol_beg;
1551         do {
1552                 volume = hammer_get_volume(hmp, vol_no, errorp);
1553                 kprintf("VOLUME %p %d\n", volume, vol_no);
1554                 if (*errorp) {
1555                         clu_no = HAMMER_ALIST_BLOCK_NONE;
1556                         break;
1557                 }
1558                 hammer_modify_volume(volume);
1559                 if (clu_hint == -1) {
1560                         clu_hint = volume->clu_iterator;
1561                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1562                                                         clu_hint);
1563                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1564                                 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1565                                                                 1, 0);
1566                         }
1567                 } else {
1568                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1569                                                         clu_hint);
1570                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1571                                 clu_no = hammer_alist_alloc_rev(&volume->alist,
1572                                                                 1, clu_hint);
1573                         }
1574                 }
1575                 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1576                         break;
1577                 hammer_rel_volume(volume, 0);
1578                 volume = NULL;
1579                 *errorp = ENOSPC;
1580                 vol_no = (vol_no + 1) % hmp->nvolumes;
1581                 clu_hint = -1;
1582         } while (vol_no != vol_beg);
1583
1584         /*
1585          * Acquire the cluster.  On success this will force *errorp to 0.
1586          */
1587         if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1588                 kprintf("ALLOC CLUSTER %d\n", clu_no);
1589                 cluster = hammer_get_cluster(volume, clu_no, errorp,
1590                                              HAMMER_ASTATE_FREE);
1591                 volume->clu_iterator = clu_no;
1592                 hammer_rel_volume(volume, 0);
1593         } else {
1594                 cluster = NULL;
1595         }
1596         if (cluster)
1597                 hammer_lock_ex(&cluster->io.lock);
1598         return(cluster);
1599 }
1600
1601 void
1602 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound, 
1603                     hammer_base_elm_t right_bound)
1604 {
1605         hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1606
1607         hammer_modify_cluster(cluster);
1608         ondisk->clu_btree_beg = *left_bound;
1609         ondisk->clu_btree_end = *right_bound;
1610         cluster->clu_btree_beg = ondisk->clu_btree_beg;
1611         cluster->clu_btree_end = ondisk->clu_btree_end;
1612 }
1613
1614 /*
1615  * Deallocate a cluster
1616  */
1617 void
1618 hammer_free_cluster(hammer_cluster_t cluster)
1619 {
1620         hammer_modify_cluster(cluster);
1621         hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1622 }
1623
1624 /*
1625  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1626  *
1627  * The passed *bufferp should be initialized to NULL.  On successive calls
1628  * *bufferp caches the most recent buffer used until put away by the caller.
1629  * Note that previously returned pointers using the cached buffer become
1630  * invalid on successive calls which reuse *bufferp.
1631  *
1632  * All allocations first attempt to use the block found at the specified
1633  * iterator.  If that fails the first available block is used.  If that
1634  * fails a new buffer is allocated and associated with the buffer type
1635  * A-list and the element is allocated out of the new buffer.
1636  */
1637
1638 hammer_node_t
1639 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1640 {
1641         hammer_buffer_t buffer;
1642         hammer_alist_t live;
1643         hammer_node_t node;
1644         int32_t elm_no;
1645         int32_t buf_no;
1646         int32_t node_offset;
1647
1648         /*
1649          * Allocate a B-Tree element
1650          */
1651         hammer_modify_cluster(cluster);
1652         buffer = NULL;
1653         live = &cluster->alist_btree;
1654         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1655         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1656                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1657         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1658                 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1659                                  cluster->ondisk->idx_index, errorp, &buffer);
1660                 elm_no = hammer_alist_alloc(live, 1);
1661                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1662                         *errorp = ENOSPC;
1663                         if (buffer)
1664                                 hammer_rel_buffer(buffer, 0);
1665                         return(NULL);
1666                 }
1667         }
1668         cluster->ondisk->idx_index = elm_no;
1669         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1670
1671         /*
1672          * Load and return the B-Tree element
1673          */
1674         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1675         node_offset = buf_no * HAMMER_BUFSIZE +
1676                       offsetof(union hammer_fsbuf_ondisk,
1677                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1678         node = hammer_get_node(cluster, node_offset, errorp);
1679         if (node) {
1680                 hammer_modify_node(node);
1681                 bzero(node->ondisk, sizeof(*node->ondisk));
1682         } else {
1683                 hammer_alist_free(live, elm_no, 1);
1684                 hammer_rel_node(node);
1685                 node = NULL;
1686         }
1687         if (buffer)
1688                 hammer_rel_buffer(buffer, 0);
1689         return(node);
1690 }
1691
1692 void *
1693 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1694                   int *errorp, struct hammer_buffer **bufferp)
1695 {
1696         hammer_buffer_t buffer;
1697         hammer_alist_t live;
1698         int32_t elm_no;
1699         int32_t buf_no;
1700         int32_t nblks;
1701         void *item;
1702
1703         /*
1704          * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
1705          * for these allocations.
1706          */
1707         hammer_modify_cluster(cluster);
1708         if ((bytes & HAMMER_BUFMASK) == 0) {
1709                 nblks = bytes / HAMMER_BUFSIZE;
1710                 /* only one block allowed for now (so buffer can hold it) */
1711                 KKASSERT(nblks == 1);
1712
1713                 buf_no = hammer_alloc_master(cluster, nblks,
1714                                              cluster->ondisk->idx_ldata, 1);
1715                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1716                         *errorp = ENOSPC;
1717                         return(NULL);
1718                 }
1719                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1720                 cluster->ondisk->idx_ldata = buf_no;
1721                 buffer = *bufferp;
1722                 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1723                 if (buffer)
1724                         hammer_rel_buffer(buffer, 0);
1725                 buffer = *bufferp;
1726                 return(buffer->ondisk);
1727         }
1728
1729         /*
1730          * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
1731          * (64 bytes) for these allocations.
1732          */
1733         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1734         nblks /= HAMMER_DATA_BLKSIZE;
1735         live = &cluster->alist_mdata;
1736         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1737         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1738                 elm_no = hammer_alist_alloc_fwd(live, nblks, 0);
1739         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1740                 alloc_new_buffer(cluster, HAMMER_FSBUF_DATA, live,
1741                                  cluster->ondisk->idx_data, errorp, bufferp);
1742                 elm_no = hammer_alist_alloc(live, nblks);
1743                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1744                         *errorp = ENOSPC;
1745                         return(NULL);
1746                 }
1747         }
1748         cluster->ondisk->idx_index = elm_no;
1749
1750         /*
1751          * Load and return the B-Tree element
1752          */
1753         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1754         buffer = *bufferp;
1755         if (buffer == NULL || buffer->cluster != cluster ||
1756             buffer->buf_no != buf_no) {
1757                 if (buffer)
1758                         hammer_rel_buffer(buffer, 0);
1759                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1760                 *bufferp = buffer;
1761         }
1762         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1763         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1764         hammer_modify_buffer(buffer);
1765         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1766         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1767         *errorp = 0;
1768         return(item);
1769 }
1770
1771 void *
1772 hammer_alloc_record(hammer_cluster_t cluster,
1773                     int *errorp, struct hammer_buffer **bufferp)
1774 {
1775         hammer_buffer_t buffer;
1776         hammer_alist_t live;
1777         int32_t elm_no;
1778         int32_t buf_no;
1779         void *item;
1780
1781         /*
1782          * Allocate a record element
1783          */
1784         hammer_modify_cluster(cluster);
1785         live = &cluster->alist_record;
1786         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1787         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1788                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1789         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1790                 alloc_new_buffer(cluster, HAMMER_FSBUF_RECORDS, live,
1791                                  cluster->ondisk->idx_record, errorp, bufferp);
1792                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1793                 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1794                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1795                         *errorp = ENOSPC;
1796                         return(NULL);
1797                 }
1798         }
1799         cluster->ondisk->idx_record = elm_no;
1800
1801         /*
1802          * Load and return the record element
1803          */
1804         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1805         buffer = *bufferp;
1806         if (buffer == NULL || buffer->cluster != cluster ||
1807             buffer->buf_no != buf_no) {
1808                 if (buffer)
1809                         hammer_rel_buffer(buffer, 0);
1810                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1811                 *bufferp = buffer;
1812         }
1813         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1814         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1815         hammer_modify_buffer(buffer);
1816         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1817         bzero(item, sizeof(union hammer_record_ondisk));
1818         *errorp = 0;
1819         return(item);
1820 }
1821
1822 void
1823 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1824 {
1825         int32_t elm_no;
1826         int32_t nblks;
1827         hammer_alist_t live;
1828
1829         hammer_modify_cluster(buffer->cluster);
1830         if ((bytes & HAMMER_BUFMASK) == 0) {
1831                 nblks = bytes / HAMMER_BUFSIZE;
1832                 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1833                 hammer_alist_free(&buffer->cluster->alist_master,
1834                                   buffer->buf_no, nblks);
1835                 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1836                 return;
1837         }
1838
1839         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1840                  HAMMER_DATA_BLKSIZE;
1841         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1842         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1843         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1844         nblks /= HAMMER_DATA_BLKSIZE;
1845         live = &buffer->cluster->alist_mdata;
1846         hammer_alist_free(live, elm_no, nblks);
1847 }
1848
1849 void
1850 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1851 {
1852         int32_t elm_no;
1853         hammer_alist_t live;
1854
1855         hammer_modify_cluster(buffer->cluster);
1856         elm_no = rec - &buffer->ondisk->record.recs[0];
1857         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1858         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1859         live = &buffer->cluster->alist_record;
1860         hammer_alist_free(live, elm_no, 1);
1861 }
1862
1863 void
1864 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1865 {
1866         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1867         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1868         hammer_alist_t live;
1869         int32_t elm_no;
1870
1871         hammer_modify_cluster(cluster);
1872         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1873         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1874         live = &cluster->alist_btree;
1875         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1876         elm_no += fsbuf_offset / blksize;
1877         hammer_alist_free(live, elm_no, 1);
1878 }
1879
1880 void
1881 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1882 {
1883         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1884         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1885         hammer_alist_t live;
1886         int32_t elm_no;
1887         int32_t buf_no;
1888         int32_t nblks;
1889
1890         hammer_modify_cluster(cluster);
1891         if ((bytes & HAMMER_BUFMASK) == 0) {
1892                 nblks = bytes / HAMMER_BUFSIZE;
1893                 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1894                 buf_no = bclu_offset / HAMMER_BUFSIZE;
1895                 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1896                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1897                 return;
1898         }
1899
1900         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1901         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1902         live = &cluster->alist_mdata;
1903         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1904         nblks /= HAMMER_DATA_BLKSIZE;
1905         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1906         elm_no += fsbuf_offset / blksize;
1907         hammer_alist_free(live, elm_no, nblks);
1908 }
1909
1910 void
1911 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1912 {
1913         const int32_t blksize = sizeof(union hammer_record_ondisk);
1914         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1915         hammer_alist_t live;
1916         int32_t elm_no;
1917
1918         hammer_modify_cluster(cluster);
1919         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1920         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1921         live = &cluster->alist_record;
1922         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1923         elm_no += fsbuf_offset / blksize;
1924         hammer_alist_free(live, elm_no, 1);
1925 }
1926
1927
1928 /*
1929  * Allocate a new filesystem buffer and assign it to the specified
1930  * filesystem buffer type.  The new buffer will be added to the
1931  * type-specific A-list and initialized.
1932  *
1933  * buffers used for records will also be added to the clu_record_buf_bitmap.
1934  */
1935 static void
1936 alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live,
1937                  int start, int *errorp, struct hammer_buffer **bufferp)
1938 {
1939         hammer_buffer_t buffer;
1940         int32_t buf_no;
1941         int isfwd;
1942
1943         if (*bufferp)
1944                 hammer_rel_buffer(*bufferp, 0);
1945         *bufferp = NULL;
1946
1947         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1948         isfwd = (type != HAMMER_FSBUF_RECORDS);
1949         buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1950         if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1951                 *errorp = ENOSPC;
1952                 return;
1953         }
1954
1955         /*
1956          * The new buffer must be initialized (type != 0) regardless of
1957          * whether we already have it cached or not, so don't try to
1958          * optimize the cached buffer check.  Just call hammer_get_buffer().
1959          */
1960         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1961         *bufferp = buffer;
1962
1963         /*
1964          * Do a meta-free of the buffer's elements into the type-specific
1965          * A-list and update our statistics to reflect the allocation.
1966          */
1967         if (buffer) {
1968 #if 0
1969                 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1970                         buf_no, type, nelements);
1971 #endif
1972                 hammer_modify_buffer(buffer);  /*XXX*/
1973                 hammer_adjust_stats(cluster, type, 1);
1974
1975                 /*
1976                  * Free the buffer to the appropriate slave list so the
1977                  * cluster-based allocator sees it.
1978                  */
1979                 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
1980                                   HAMMER_FSBUF_MAXBLKS);
1981
1982         }
1983
1984         /*
1985          * And, finally, update clu_record_buf_bitmap for record buffers.
1986          * Since buffers are synced to disk before their associated cluster
1987          * header, a recovery operation will only see synced record buffers
1988          * in the bitmap.  XXX We can't use alist_record for recovery due
1989          * to the way we currently manage it.
1990          */
1991         if (buffer && type == HAMMER_FSBUF_RECORDS) {
1992                 KKASSERT(buf_no >= 0 && buf_no < HAMMER_CLU_MAXBUFFERS);
1993                 hammer_modify_cluster(cluster);
1994                 cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5] |=
1995                         (1 << (buf_no & 31));
1996         }
1997 }
1998
1999 /*
2000  * Sync dirty buffers to the media
2001  */
2002
2003 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2004 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2005
2006 int
2007 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2008 {
2009         struct hammer_sync_info info;
2010
2011         info.error = 0;
2012         info.waitfor = waitfor;
2013
2014         kprintf("hammer_sync\n");
2015         vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2016                       hammer_sync_scan1, hammer_sync_scan2, &info);
2017
2018         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2019                 hammer_sync_volume, &info);
2020         return(info.error);
2021 }
2022
2023 static int
2024 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2025 {
2026         struct hammer_inode *ip;
2027
2028         ip = VTOI(vp);
2029         if (vp->v_type == VNON || ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2030             RB_EMPTY(&vp->v_rbdirty_tree))) {
2031                 return(-1);
2032         }
2033         return(0);
2034 }
2035
2036 static int
2037 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2038 {
2039         struct hammer_sync_info *info = data;
2040         struct hammer_inode *ip;
2041         int error;
2042
2043         ip = VTOI(vp);
2044         if (vp->v_type == VNON || vp->v_type == VBAD ||
2045             ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2046              RB_EMPTY(&vp->v_rbdirty_tree))) {
2047                 return(0);
2048         }
2049         if (vp->v_type != VCHR) {
2050                 error = VOP_FSYNC(vp, info->waitfor);
2051                 if (error)
2052                         info->error = error;
2053         }
2054         return(0);
2055 }
2056
2057 int
2058 hammer_sync_volume(hammer_volume_t volume, void *data)
2059 {
2060         struct hammer_sync_info *info = data;
2061
2062         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2063                 hammer_sync_cluster, info);
2064         if (hammer_ref_volume(volume) == 0)
2065                 hammer_rel_volume(volume, 1);
2066         return(0);
2067 }
2068
2069 int
2070 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2071 {
2072         struct hammer_sync_info *info = data;
2073
2074         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2075                 hammer_sync_buffer, info);
2076         /*hammer_io_waitdep(&cluster->io);*/
2077         if (hammer_ref_cluster(cluster) == 0)
2078                 hammer_rel_cluster(cluster, 1);
2079         return(0);
2080 }
2081
2082 int
2083 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
2084 {
2085         if (hammer_ref_buffer(buffer) == 0) {
2086                 hammer_lock_ex(&buffer->io.lock);
2087                 hammer_flush_buffer_nodes(buffer);
2088                 hammer_unlock(&buffer->io.lock);
2089                 hammer_rel_buffer(buffer, 1);
2090         }
2091         return(0);
2092 }
2093
2094 /*
2095  * Generic buffer initialization
2096  */
2097 void
2098 hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2099 {
2100         head->buf_type = type;
2101
2102         switch(type) {
2103         case HAMMER_FSBUF_BTREE:
2104                 hammer_alist_init(live, 0, HAMMER_BTREE_NODES, HAMMER_ASTATE_FREE);
2105                 break;
2106         case HAMMER_FSBUF_DATA:
2107                 hammer_alist_init(live, 0, HAMMER_DATA_NODES, HAMMER_ASTATE_FREE);
2108                 break;
2109         case HAMMER_FSBUF_RECORDS:
2110                 hammer_alist_init(live, 0, HAMMER_RECORD_NODES, HAMMER_ASTATE_FREE);
2111                 break;
2112         default:
2113                 hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
2114                 break;
2115         }
2116 }
2117
2118 /*
2119  * Calculate the cluster's offset in the volume.  This calculation is
2120  * slightly more complex when using superclusters because superclusters
2121  * are grouped in blocks of 16, followed by 16 x N clusters where N
2122  * is the number of clusters a supercluster can manage.
2123  */
2124 static int64_t
2125 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2126 {
2127         int32_t scl_group;
2128         int64_t scl_group_size;
2129         int64_t off;
2130
2131         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2132                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2133                             HAMMER_SCL_MAXCLUSTERS;
2134                 scl_group_size = 
2135                             ((int64_t)HAMMER_BUFSIZE *
2136                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2137                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2138                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2139                 scl_group_size += 
2140                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2141
2142                 off = volume->cluster_base +
2143                       scl_group * scl_group_size +
2144                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2145                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2146                        HAMMER_VOL_SUPERCLUSTER_GROUP))
2147                       * volume->vol_clsize;
2148         } else {
2149                 off = volume->cluster_base +
2150                       (int64_t)clu_no * volume->vol_clsize;
2151         }
2152         return(off);
2153 }
2154
2155 /*
2156  * Calculate a super-cluster's offset in the volume.
2157  */
2158 static int64_t
2159 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2160 {
2161         int64_t off;
2162         int32_t scl_group;
2163         int64_t scl_group_size;
2164
2165         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2166         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2167         if (scl_group) {
2168                 scl_group_size = 
2169                             ((int64_t)HAMMER_BUFSIZE *
2170                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2171                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2172                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2173                 scl_group_size += 
2174                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2175                 off = volume->cluster_base + (scl_group * scl_group_size) +
2176                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2177         } else {
2178                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2179         }
2180         return(off);
2181 }
2182
2183 /*
2184  * Allocate nblks buffers from the cluster's master alist.
2185  */
2186 static int32_t
2187 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2188                     int32_t start, int isfwd)
2189 {
2190         int32_t buf_no;
2191
2192         hammer_modify_cluster(cluster);
2193         if (isfwd) {
2194                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2195                                                 nblks, start);
2196                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2197                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2198                                                 nblks, 0);
2199                 }
2200         } else {
2201                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2202                                                 nblks, start);
2203                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2204                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2205                                                 nblks, HAMMER_ALIST_BLOCK_MAX);
2206                 }
2207         }
2208
2209         /*
2210          * Recover space from empty record, b-tree, and data a-lists.
2211          */
2212
2213         return(buf_no);
2214 }
2215
2216 /*
2217  * Adjust allocation statistics
2218  */
2219 static void
2220 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2221 {
2222         hammer_modify_cluster(cluster);
2223         hammer_modify_volume(cluster->volume);
2224         hammer_modify_volume(cluster->volume->hmp->rootvol);
2225
2226         switch(buf_type) {
2227         case HAMMER_FSBUF_BTREE:
2228                 cluster->ondisk->stat_idx_bufs += nblks;
2229                 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2230                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2231                 break;
2232         case HAMMER_FSBUF_DATA:
2233                 cluster->ondisk->stat_data_bufs += nblks;
2234                 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2235                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2236                 break;
2237         case HAMMER_FSBUF_RECORDS:
2238                 cluster->ondisk->stat_rec_bufs += nblks;
2239                 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2240                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2241                 break;
2242         }
2243 }
2244
2245 /*
2246  * A-LIST SUPPORT
2247  *
2248  * Setup the parameters for the various A-lists we use in hammer.  The
2249  * supercluster A-list must be chained to the cluster A-list and cluster
2250  * slave A-lists are chained to buffer A-lists.
2251  *
2252  * See hammer_init_alist_config() below.
2253  */
2254
2255 /*
2256  * A-LIST - cluster recursion into a filesystem buffer
2257  *
2258  * In the init case the buffer has already been initialized by
2259  * alloc_new_buffer() when it allocated the buffer out of the master
2260  * alist and marked it as free in the slave alist.
2261  *
2262  * Because we use a somewhat odd mechanism to assign buffers to slave
2263  * pools we can't actually free the buffer back to the master alist in
2264  * buffer_alist_destroy(), but instead must deal with that logic somewhere
2265  * else.
2266  */
2267 static int
2268 buffer_alist_init(void *info, int32_t blk, int32_t radix,
2269                   hammer_alloc_state_t state)
2270 {
2271         return(0);
2272 }
2273
2274 static int
2275 buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2276 {
2277         hammer_cluster_t cluster = info;
2278         hammer_buffer_t buffer;
2279         int32_t buf_no;
2280         int error = 0;
2281
2282         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2283         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2284         if (buffer) {
2285                 hammer_modify_buffer(buffer);
2286                 error = hammer_alist_recover(&buffer->alist, blk, 0, count);
2287                 /* free block count is returned if >= 0 */
2288                 hammer_rel_buffer(buffer, 0);
2289         } else {
2290                 error = -error;
2291         }
2292         return (error);
2293 }
2294
2295 /*
2296  * Note: This routine is only called when freeing the last elements of
2297  * an initialized buffer.  Freeing all elements of the buffer when the
2298  * buffer was not previously initialized does not call this routine.
2299  */
2300 static int
2301 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2302 {
2303         hammer_cluster_t cluster = info;
2304         int32_t buf_no;
2305
2306         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2307         kprintf("destroy buffer %d:%d:%d\n", cluster->volume->vol_no, cluster->clu_no, buf_no);
2308         return (0);
2309 }
2310
2311 /*
2312  * Note: atblk can be negative and atblk - blk can go negative.
2313  */
2314 static int
2315 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2316                       int32_t count, int32_t atblk, int32_t *fullp)
2317 {
2318         hammer_cluster_t cluster = info;
2319         hammer_buffer_t buffer;
2320         int32_t buf_no;
2321         int32_t r;
2322         int error = 0;
2323
2324         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2325         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2326         if (buffer) {
2327                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2328
2329                 hammer_modify_buffer(buffer);
2330                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2331                 if (r != HAMMER_ALIST_BLOCK_NONE)
2332                         r += blk;
2333                 *fullp = hammer_alist_isfull(&buffer->alist);
2334                 hammer_rel_buffer(buffer, 0);
2335         } else {
2336                 r = HAMMER_ALIST_BLOCK_NONE;
2337         }
2338         return(r);
2339 }
2340
2341 static int
2342 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2343                       int32_t count, int32_t atblk, int32_t *fullp)
2344 {
2345         hammer_cluster_t cluster = info;
2346         hammer_buffer_t buffer;
2347         int32_t buf_no;
2348         int32_t r;
2349         int error = 0;
2350
2351         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2352         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2353         if (buffer) {
2354                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2355                 hammer_modify_buffer(buffer);
2356                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2357                 if (r != HAMMER_ALIST_BLOCK_NONE)
2358                         r += blk;
2359                 *fullp = hammer_alist_isfull(&buffer->alist);
2360                 hammer_rel_buffer(buffer, 0);
2361         } else {
2362                 r = HAMMER_ALIST_BLOCK_NONE;
2363                 *fullp = 0;
2364         }
2365         return(r);
2366 }
2367
2368 static void
2369 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2370                  int32_t base_blk, int32_t count, int32_t *emptyp)
2371 {
2372         hammer_cluster_t cluster = info;
2373         hammer_buffer_t buffer;
2374         int32_t buf_no;
2375         int error = 0;
2376
2377         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2378         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2379         if (buffer) {
2380                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2381                 hammer_modify_buffer(buffer);
2382                 hammer_alist_free(&buffer->alist, base_blk, count);
2383                 *emptyp = hammer_alist_isempty(&buffer->alist);
2384                 /* XXX don't bother updating the buffer is completely empty? */
2385                 hammer_rel_buffer(buffer, 0);
2386         } else {
2387                 *emptyp = 0;
2388         }
2389 }
2390
2391 static void
2392 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2393 {
2394 }
2395
2396 /*
2397  * A-LIST - super-cluster recursion into a cluster and cluster recursion
2398  * into a filesystem buffer.  A-List's are mostly self-contained entities,
2399  * but callbacks must be installed to recurse from one A-List to another.
2400  *
2401  * Implementing these callbacks allows us to operate a multi-layered A-List
2402  * as a single entity.
2403  */
2404
2405 /*
2406  * This occurs when allocating a cluster via the volume a-list and the
2407  * entry in the volume a-list indicated all-free.  The underlying supercl
2408  * has not yet been initialized.
2409  */
2410 static int
2411 super_alist_init(void *info, int32_t blk, int32_t radix,
2412                  hammer_alloc_state_t state)
2413 {
2414         hammer_volume_t volume = info;
2415         hammer_supercl_t supercl;
2416         int32_t scl_no;
2417         int error = 0;
2418
2419         /*
2420          * Calculate the super-cluster number containing the cluster (blk)
2421          * and obtain the super-cluster buffer.
2422          */
2423         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2424         supercl = hammer_get_supercl(volume, scl_no, &error, state);
2425         if (supercl)
2426                 hammer_rel_supercl(supercl, 0);
2427         return (error);
2428 }
2429
2430 static int
2431 super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2432 {
2433         hammer_volume_t volume = info;
2434         hammer_supercl_t supercl;
2435         int32_t scl_no;
2436         int error = 0;
2437
2438         /*
2439          * Calculate the super-cluster number containing the cluster (blk)
2440          * and obtain the super-cluster buffer.
2441          */
2442         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2443         supercl = hammer_get_supercl(volume, scl_no, &error,
2444                                      HAMMER_ASTATE_NONE);
2445         if (supercl) {
2446                 hammer_modify_supercl(supercl);
2447                 error = hammer_alist_recover(&supercl->alist, blk, 0, count);
2448                 /* free block count is returned if >= 0 */
2449                 hammer_rel_supercl(supercl, 0);
2450         } else {
2451                 error = -error;
2452         }
2453         return (error);
2454 }
2455
2456 /*
2457  * This occurs when freeing a cluster via the volume a-list and the
2458  * supercl is now 100% free.  We can destroy the supercl.
2459  *
2460  * What we actually do is just unset the modify bit so it doesn't get
2461  * written out.
2462  */
2463 static int
2464 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2465 {
2466         hammer_volume_t volume = info;
2467         hammer_supercl_t supercl;
2468         int32_t scl_no;
2469         int error = 0;
2470
2471         /*
2472          * Calculate the super-cluster number containing the cluster (blk)
2473          * and obtain the super-cluster buffer.
2474          */
2475         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2476         if (hammer_find_supercl(volume, scl_no)) {
2477                 supercl = hammer_get_supercl(volume, scl_no, &error,
2478                                              HAMMER_ASTATE_FREE);
2479                                              /* XXX */
2480                 hammer_io_clear_modify(&supercl->io);
2481                 if (supercl)
2482                         hammer_rel_supercl(supercl, 0);
2483         }
2484         return (error);
2485 }
2486
2487 static int
2488 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2489                       int32_t count, int32_t atblk, int32_t *fullp)
2490 {
2491         hammer_volume_t volume = info;
2492         hammer_supercl_t supercl;
2493         int32_t scl_no;
2494         int32_t r;
2495         int error = 0;
2496
2497         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2498         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2499         if (supercl) {
2500                 hammer_modify_supercl(supercl);
2501                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2502                 if (r != HAMMER_ALIST_BLOCK_NONE)
2503                         r += blk;
2504                 *fullp = hammer_alist_isfull(&supercl->alist);
2505                 hammer_rel_supercl(supercl, 0);
2506         } else {
2507                 r = HAMMER_ALIST_BLOCK_NONE;
2508                 *fullp = 0;
2509         }
2510         return(r);
2511 }
2512
2513 static int
2514 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2515                       int32_t count, int32_t atblk, int32_t *fullp)
2516 {
2517         hammer_volume_t volume = info;
2518         hammer_supercl_t supercl;
2519         int32_t scl_no;
2520         int32_t r;
2521         int error = 0;
2522
2523         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2524         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2525         if (supercl) {
2526                 hammer_modify_supercl(supercl);
2527                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2528                 if (r != HAMMER_ALIST_BLOCK_NONE)
2529                         r += blk;
2530                 *fullp = hammer_alist_isfull(&supercl->alist);
2531                 hammer_rel_supercl(supercl, 0);
2532         } else { 
2533                 r = HAMMER_ALIST_BLOCK_NONE;
2534                 *fullp = 0;
2535         }
2536         return(r);
2537 }
2538
2539 static void
2540 super_alist_free(void *info, int32_t blk, int32_t radix,
2541                  int32_t base_blk, int32_t count, int32_t *emptyp)
2542 {
2543         hammer_volume_t volume = info;
2544         hammer_supercl_t supercl;
2545         int32_t scl_no;
2546         int error = 0;
2547
2548         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2549         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2550         if (supercl) {
2551                 hammer_modify_supercl(supercl);
2552                 hammer_alist_free(&supercl->alist, base_blk, count);
2553                 *emptyp = hammer_alist_isempty(&supercl->alist);
2554                 hammer_rel_supercl(supercl, 0);
2555         } else {
2556                 *emptyp = 0;
2557         }
2558 }
2559
2560 static void
2561 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2562 {
2563 }
2564
2565 void
2566 hammer_init_alist_config(void)
2567 {
2568         hammer_alist_config_t config;
2569
2570         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2571                               1, HAMMER_FSBUF_METAELMS);
2572         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2573                               1, HAMMER_VOL_METAELMS_1LYR);
2574         hammer_alist_template(&Vol_super_alist_config,
2575                           HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2576                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2577         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2578                               1, HAMMER_SUPERCL_METAELMS);
2579         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2580                               1, HAMMER_CLU_MASTER_METAELMS);
2581         hammer_alist_template(&Clu_slave_alist_config,
2582                               HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2583                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2584
2585         config = &Vol_super_alist_config;
2586         config->bl_radix_init = super_alist_init;
2587         config->bl_radix_recover = super_alist_recover;
2588         config->bl_radix_destroy = super_alist_destroy;
2589         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2590         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2591         config->bl_radix_free = super_alist_free;
2592         config->bl_radix_print = super_alist_print;
2593
2594         config = &Clu_slave_alist_config;
2595         config->bl_radix_init = buffer_alist_init;
2596         config->bl_radix_recover = buffer_alist_recover;
2597         config->bl_radix_destroy = buffer_alist_destroy;
2598         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2599         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2600         config->bl_radix_free = buffer_alist_free;
2601         config->bl_radix_print = buffer_alist_print;
2602 }
2603