HAMMER 14/many - historical access cleanup, itimes, bug fixes.
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.15 2008/01/01 01:00:03 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54                         hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56                         u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58                         hammer_alist_t live, u_int64_t type, int32_t nelements,
59                         int32_t start,
60                         int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63                         int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65                         int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
70                         int32_t start, int isfwd);
71 static void hammer_adjust_stats(hammer_cluster_t cluster,
72                         u_int64_t buf_type, int nblks);
73
74 struct hammer_alist_config Buf_alist_config;
75 struct hammer_alist_config Vol_normal_alist_config;
76 struct hammer_alist_config Vol_super_alist_config;
77 struct hammer_alist_config Supercl_alist_config;
78 struct hammer_alist_config Clu_master_alist_config;
79 struct hammer_alist_config Clu_slave_alist_config;
80
81 /*
82  * Red-Black tree support for various structures
83  */
84 static int
85 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
86 {
87         if (ip1->obj_id < ip2->obj_id)
88                 return(-1);
89         if (ip1->obj_id > ip2->obj_id)
90                 return(1);
91         if (ip1->obj_asof < ip2->obj_asof)
92                 return(-1);
93         if (ip1->obj_asof > ip2->obj_asof)
94                 return(1);
95         return(0);
96 }
97
98 static int
99 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
100 {
101         if (info->obj_id < ip->obj_id)
102                 return(-1);
103         if (info->obj_id > ip->obj_id)
104                 return(1);
105         if (info->obj_asof < ip->obj_asof)
106                 return(-1);
107         if (info->obj_asof > ip->obj_asof)
108                 return(1);
109         return(0);
110 }
111
112 static int
113 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
114 {
115         if (vol1->vol_no < vol2->vol_no)
116                 return(-1);
117         if (vol1->vol_no > vol2->vol_no)
118                 return(1);
119         return(0);
120 }
121
122 static int
123 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
124 {
125         if (cl1->scl_no < cl2->scl_no)
126                 return(-1);
127         if (cl1->scl_no > cl2->scl_no)
128                 return(1);
129         return(0);
130 }
131
132 static int
133 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
134 {
135         if (cl1->clu_no < cl2->clu_no)
136                 return(-1);
137         if (cl1->clu_no > cl2->clu_no)
138                 return(1);
139         return(0);
140 }
141
142 static int
143 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
144 {
145         if (buf1->buf_no < buf2->buf_no)
146                 return(-1);
147         if (buf1->buf_no > buf2->buf_no)
148                 return(1);
149         return(0);
150 }
151
152 static int
153 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
154 {
155         if (node1->node_offset < node2->node_offset)
156                 return(-1);
157         if (node1->node_offset > node2->node_offset)
158                 return(1);
159         return(0);
160 }
161
162 /*
163  * Note: The lookup function for hammer_ino_rb_tree winds up being named
164  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
165  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
166  */
167 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
168 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
169                 hammer_inode_info_cmp, hammer_inode_info_t);
170 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
171              hammer_vol_rb_compare, int32_t, vol_no);
172 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
173              hammer_scl_rb_compare, int32_t, scl_no);
174 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
175              hammer_clu_rb_compare, int32_t, clu_no);
176 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
177              hammer_buf_rb_compare, int32_t, buf_no);
178 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
179              hammer_nod_rb_compare, int32_t, node_offset);
180
181 /************************************************************************
182  *                              VOLUMES                                 *
183  ************************************************************************
184  *
185  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
186  * code on failure.  Volumes must be loaded at mount time, get_volume() will
187  * not load a new volume.
188  *
189  * Calls made to hammer_load_volume() or single-threaded
190  */
191 int
192 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
193 {
194         struct mount *mp;
195         hammer_volume_t volume;
196         struct hammer_volume_ondisk *ondisk;
197         struct nlookupdata nd;
198         struct buf *bp = NULL;
199         int error;
200         int ronly;
201
202         mp = hmp->mp;
203         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
204
205         /*
206          * Allocate a volume structure
207          */
208         ++hammer_count_volumes;
209         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
210         volume->vol_name = kstrdup(volname, M_HAMMER);
211         volume->hmp = hmp;
212         volume->io.type = HAMMER_STRUCTURE_VOLUME;
213         volume->io.offset = 0LL;
214
215         /*
216          * Get the device vnode
217          */
218         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
219         if (error == 0)
220                 error = nlookup(&nd);
221         if (error == 0)
222                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
223         nlookup_done(&nd);
224         if (error == 0) {
225                 vn_isdisk(volume->devvp, &error);
226         }
227         if (error == 0) {
228                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
229                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
230                                  FSCRED, NULL);
231                 vn_unlock(volume->devvp);
232         }
233         if (error) {
234                 hammer_free_volume(volume);
235                 return(error);
236         }
237
238         /*
239          * Extract the volume number from the volume header and do various
240          * sanity checks.
241          */
242         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
243         if (error)
244                 goto late_failure;
245         ondisk = (void *)bp->b_data;
246         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
247                 kprintf("hammer_mount: volume %s has an invalid header\n",
248                         volume->vol_name);
249                 error = EFTYPE;
250                 goto late_failure;
251         }
252         volume->vol_no = ondisk->vol_no;
253         volume->cluster_base = ondisk->vol_clo_beg;
254         volume->vol_clsize = ondisk->vol_clsize;
255         volume->vol_flags = ondisk->vol_flags;
256         volume->nblocks = ondisk->vol_nblocks; 
257         RB_INIT(&volume->rb_clus_root);
258         RB_INIT(&volume->rb_scls_root);
259
260         hmp->mp->mnt_stat.f_blocks += volume->nblocks;
261
262         if (RB_EMPTY(&hmp->rb_vols_root)) {
263                 hmp->fsid = ondisk->vol_fsid;
264         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
265                 kprintf("hammer_mount: volume %s's fsid does not match "
266                         "other volumes\n", volume->vol_name);
267                 error = EFTYPE;
268                 goto late_failure;
269         }
270
271         /*
272          * Insert the volume structure into the red-black tree.
273          */
274         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
275                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
276                         volume->vol_name, volume->vol_no);
277                 error = EEXIST;
278         }
279
280         /*
281          * Set the root volume and load the root cluster.  HAMMER special
282          * cases rootvol and rootcl and will not deallocate the structures.
283          * We do not hold a ref because this would prevent related I/O
284          * from being flushed.
285          */
286         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
287                 hmp->rootvol = volume;
288                 hmp->rootcl = hammer_get_cluster(volume,
289                                                  ondisk->vol0_root_clu_no,
290                                                  &error, 0);
291                 hammer_rel_cluster(hmp->rootcl, 0);
292                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
293         }
294 late_failure:
295         if (bp)
296                 brelse(bp);
297         if (error) {
298                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
299                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
300                 hammer_free_volume(volume);
301         }
302         return (error);
303 }
304
305 /*
306  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
307  * so returns -1 on failure.
308  */
309 int
310 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
311 {
312         struct hammer_mount *hmp = volume->hmp;
313         hammer_cluster_t rootcl;
314         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
315
316         /*
317          * Sync clusters, sync volume
318          */
319
320         hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
321
322         /*
323          * Clean up the root cluster, which is held unlocked in the root
324          * volume.
325          */
326         if (hmp->rootvol == volume) {
327                 if ((rootcl = hmp->rootcl) != NULL)
328                         hmp->rootcl = NULL;
329                 hmp->rootvol = NULL;
330         }
331
332         /*
333          * Unload clusters and super-clusters.  Unloading a super-cluster
334          * also unloads related clusters, but the filesystem may not be
335          * using super-clusters so unload clusters anyway.
336          */
337         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
338                         hammer_unload_cluster, NULL);
339         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
340                         hammer_unload_supercl, NULL);
341
342         /*
343          * Release our buffer and flush anything left in the buffer cache.
344          */
345         hammer_io_release(&volume->io, 1);
346
347         /*
348          * There should be no references on the volume, no clusters, and
349          * no super-clusters.
350          */
351         KKASSERT(volume->io.lock.refs == 0);
352         KKASSERT(RB_EMPTY(&volume->rb_clus_root));
353         KKASSERT(RB_EMPTY(&volume->rb_scls_root));
354
355         volume->ondisk = NULL;
356         if (volume->devvp) {
357                 if (ronly) {
358                         vinvalbuf(volume->devvp, 0, 0, 0);
359                         VOP_CLOSE(volume->devvp, FREAD);
360                 } else {
361                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
362                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
363                 }
364         }
365
366         /*
367          * Destroy the structure
368          */
369         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
370         hammer_free_volume(volume);
371         return(0);
372 }
373
374 static
375 void
376 hammer_free_volume(hammer_volume_t volume)
377 {
378         if (volume->vol_name) {
379                 kfree(volume->vol_name, M_HAMMER);
380                 volume->vol_name = NULL;
381         }
382         if (volume->devvp) {
383                 vrele(volume->devvp);
384                 volume->devvp = NULL;
385         }
386         --hammer_count_volumes;
387         kfree(volume, M_HAMMER);
388 }
389
390 /*
391  * Get a HAMMER volume.  The volume must already exist.
392  */
393 hammer_volume_t
394 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
395 {
396         struct hammer_volume *volume;
397
398         /*
399          * Locate the volume structure
400          */
401         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
402         if (volume == NULL) {
403                 *errorp = ENOENT;
404                 return(NULL);
405         }
406         hammer_ref(&volume->io.lock);
407
408         /*
409          * Deal with on-disk info
410          */
411         if (volume->ondisk == NULL) {
412                 *errorp = hammer_load_volume(volume);
413                 if (*errorp) {
414                         hammer_rel_volume(volume, 1);
415                         volume = NULL;
416                 }
417         } else {
418                 *errorp = 0;
419         }
420         return(volume);
421 }
422
423 int
424 hammer_ref_volume(hammer_volume_t volume)
425 {
426         int error;
427
428         hammer_ref(&volume->io.lock);
429
430         /*
431          * Deal with on-disk info
432          */
433         if (volume->ondisk == NULL) {
434                 error = hammer_load_volume(volume);
435                 if (error)
436                         hammer_rel_volume(volume, 1);
437         } else {
438                 error = 0;
439         }
440         return (error);
441 }
442
443 hammer_volume_t
444 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
445 {
446         hammer_volume_t volume;
447
448         volume = hmp->rootvol;
449         KKASSERT(volume != NULL);
450         hammer_ref(&volume->io.lock);
451
452         /*
453          * Deal with on-disk info
454          */
455         if (volume->ondisk == NULL) {
456                 *errorp = hammer_load_volume(volume);
457                 if (*errorp) {
458                         hammer_rel_volume(volume, 1);
459                         volume = NULL;
460                 }
461         } else {
462                 *errorp = 0;
463         }
464         return (volume);
465 }
466
467 /*
468  * Load a volume's on-disk information.  The volume must be referenced and
469  * not locked.  We temporarily acquire an exclusive lock to interlock
470  * against releases or multiple get's.
471  */
472 static int
473 hammer_load_volume(hammer_volume_t volume)
474 {
475         struct hammer_volume_ondisk *ondisk;
476         int error;
477
478         hammer_lock_ex(&volume->io.lock);
479         if (volume->ondisk == NULL) {
480                 error = hammer_io_read(volume->devvp, &volume->io);
481                 if (error) {
482                         hammer_unlock(&volume->io.lock);
483                         return (error);
484                 }
485                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
486
487                 /*
488                  * Configure the volume's A-lists.  These are used to
489                  * allocate clusters.
490                  */
491                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
492                         volume->alist.config = &Vol_super_alist_config;
493                         volume->alist.meta = ondisk->vol_almeta.super;
494                         volume->alist.info = volume;
495                 } else {
496                         volume->alist.config = &Vol_normal_alist_config;
497                         volume->alist.meta = ondisk->vol_almeta.normal;
498                         volume->alist.info = NULL;
499                 }
500         } else {
501                 error = 0;
502         }
503         hammer_unlock(&volume->io.lock);
504         return(0);
505 }
506
507 /*
508  * Release a volume.  Call hammer_io_release on the last reference.  We have
509  * to acquire an exclusive lock to interlock against volume->ondisk tests
510  * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
511  * lock to be held.
512  *
513  * Volumes are not unloaded from memory during normal operation.
514  */
515 void
516 hammer_rel_volume(hammer_volume_t volume, int flush)
517 {
518         if (volume->io.lock.refs == 1) {
519                 hammer_lock_ex(&volume->io.lock);
520                 if (volume->io.lock.refs == 1) {
521                         volume->ondisk = NULL;
522                         hammer_io_release(&volume->io, flush);
523                 }
524                 hammer_unlock(&volume->io.lock);
525         }
526         hammer_unref(&volume->io.lock);
527 }
528
529 /************************************************************************
530  *                              SUPER-CLUSTERS                          *
531  ************************************************************************
532  *
533  * Manage super-clusters.  Note that a supercl holds a reference to its
534  * associated volume.
535  */
536 hammer_supercl_t
537 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
538                    int *errorp, int isnew)
539 {
540         hammer_supercl_t supercl;
541
542         /*
543          * Locate and lock the super-cluster structure, creating one
544          * if necessary.
545          */
546 again:
547         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
548         if (supercl == NULL) {
549                 ++hammer_count_supercls;
550                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
551                 supercl->scl_no = scl_no;
552                 supercl->volume = volume;
553                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
554                 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
555                 hammer_ref(&supercl->io.lock);
556
557                 /*
558                  * Insert the cluster into the RB tree and handle late
559                  * collisions.
560                  */
561                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
562                         hammer_unref(&supercl->io.lock);
563                         --hammer_count_supercls;
564                         kfree(supercl, M_HAMMER);
565                         goto again;
566                 }
567                 hammer_ref(&volume->io.lock);
568         } else {
569                 hammer_ref(&supercl->io.lock);
570         }
571
572         /*
573          * Deal with on-disk info
574          */
575         if (supercl->ondisk == NULL || isnew) {
576                 *errorp = hammer_load_supercl(supercl, isnew);
577                 if (*errorp) {
578                         hammer_rel_supercl(supercl, 1);
579                         supercl = NULL;
580                 }
581         } else {
582                 *errorp = 0;
583         }
584         return(supercl);
585 }
586
587 static int
588 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
589 {
590         struct hammer_supercl_ondisk *ondisk;
591         hammer_volume_t volume = supercl->volume;
592         int error;
593
594         hammer_lock_ex(&supercl->io.lock);
595         if (supercl->ondisk == NULL) {
596                 if (isnew)
597                         error = hammer_io_new(volume->devvp, &supercl->io);
598                 else
599                         error = hammer_io_read(volume->devvp, &supercl->io);
600                 if (error) {
601                         hammer_unlock(&supercl->io.lock);
602                         return (error);
603                 }
604                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
605
606                 supercl->alist.config = &Supercl_alist_config;
607                 supercl->alist.meta = ondisk->scl_meta;
608                 supercl->alist.info = NULL;
609         } else if (isnew) {
610                 error = hammer_io_new(volume->devvp, &supercl->io);
611         } else {
612                 error = 0;
613         }
614         if (error == 0 && isnew) {
615                 /*
616                  * If this is a new super-cluster we have to initialize
617                  * various ondisk structural elements.  The caller is
618                  * responsible for the remainder.
619                  */
620                 struct hammer_alist_live dummy;
621
622                 ondisk = supercl->ondisk;
623                 dummy.config = &Buf_alist_config;
624                 dummy.meta = ondisk->head.buf_almeta;
625                 dummy.info = NULL;
626                 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
627                 hammer_alist_init(&supercl->alist);
628         }
629         hammer_unlock(&supercl->io.lock);
630         return (error);
631 }
632
633 /*
634  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
635  */
636 int
637 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
638 {
639         KKASSERT(supercl->io.lock.refs == 0);
640         hammer_ref(&supercl->io.lock);
641         hammer_rel_supercl(supercl, 1);
642         return(0);
643 }
644
645 /*
646  * Release a super-cluster.  We have to deal with several places where
647  * another thread can ref the super-cluster.
648  *
649  * Only destroy the structure itself if the related buffer cache buffer
650  * was disassociated from it.  This ties the management of the structure
651  * to the buffer cache subsystem.
652  */
653 void
654 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
655 {
656         hammer_volume_t volume;
657
658         if (supercl->io.lock.refs == 1) {
659                 hammer_lock_ex(&supercl->io.lock);
660                 if (supercl->io.lock.refs == 1) {
661                         hammer_io_release(&supercl->io, flush);
662                         if (supercl->io.bp == NULL &&
663                             supercl->io.lock.refs == 1) {
664                                 volume = supercl->volume;
665                                 RB_REMOVE(hammer_scl_rb_tree,
666                                           &volume->rb_scls_root, supercl);
667                                 supercl->volume = NULL; /* sanity */
668                                 --hammer_count_supercls;
669                                 kfree(supercl, M_HAMMER);
670                                 hammer_rel_volume(volume, 0);
671                                 return;
672                         }
673                 }
674                 hammer_unlock(&supercl->io.lock);
675         }
676         hammer_unref(&supercl->io.lock);
677 }
678
679 /************************************************************************
680  *                              CLUSTERS                                *
681  ************************************************************************
682  *
683  */
684 hammer_cluster_t
685 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
686                    int *errorp, int isnew)
687 {
688         hammer_cluster_t cluster;
689
690 again:
691         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
692         if (cluster == NULL) {
693                 ++hammer_count_clusters;
694                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
695                 cluster->clu_no = clu_no;
696                 cluster->volume = volume;
697                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
698                 cluster->state = HAMMER_CLUSTER_IDLE;
699                 RB_INIT(&cluster->rb_bufs_root);
700                 RB_INIT(&cluster->rb_nods_root);
701                 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
702                 hammer_ref(&cluster->io.lock);
703
704                 /*
705                  * Insert the cluster into the RB tree and handle late
706                  * collisions.
707                  */
708                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
709                         hammer_unref(&cluster->io.lock);
710                         --hammer_count_clusters;
711                         kfree(cluster, M_HAMMER);
712                         goto again;
713                 }
714                 hammer_ref(&volume->io.lock);
715         } else {
716                 hammer_ref(&cluster->io.lock);
717         }
718
719         /*
720          * Deal with on-disk info
721          */
722         if (cluster->ondisk == NULL || isnew) {
723                 *errorp = hammer_load_cluster(cluster, isnew);
724                 if (*errorp) {
725                         hammer_rel_cluster(cluster, 1);
726                         cluster = NULL;
727                 }
728         } else {
729                 *errorp = 0;
730         }
731         return (cluster);
732 }
733
734 hammer_cluster_t
735 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
736 {
737         hammer_cluster_t cluster;
738
739         cluster = hmp->rootcl;
740         KKASSERT(cluster != NULL);
741         hammer_ref(&cluster->io.lock);
742
743         /*
744          * Deal with on-disk info
745          */
746         if (cluster->ondisk == NULL) {
747                 *errorp = hammer_load_cluster(cluster, 0);
748                 if (*errorp) {
749                         hammer_rel_cluster(cluster, 1);
750                         cluster = NULL;
751                 }
752         } else {
753                 *errorp = 0;
754         }
755         return (cluster);
756 }
757
758 static
759 int
760 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
761 {
762         hammer_volume_t volume = cluster->volume;
763         struct hammer_cluster_ondisk *ondisk;
764         int error;
765
766         /*
767          * Load the cluster's on-disk info
768          */
769         hammer_lock_ex(&cluster->io.lock);
770         if (cluster->ondisk == NULL) {
771                 if (isnew)
772                         error = hammer_io_new(volume->devvp, &cluster->io);
773                 else
774                         error = hammer_io_read(volume->devvp, &cluster->io);
775                 if (error) {
776                         hammer_unlock(&cluster->io.lock);
777                         return (error);
778                 }
779                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
780
781                 cluster->alist_master.config = &Clu_master_alist_config;
782                 cluster->alist_master.meta = ondisk->clu_master_meta;
783                 cluster->alist_btree.config = &Clu_slave_alist_config;
784                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
785                 cluster->alist_btree.info = cluster;
786                 cluster->alist_record.config = &Clu_slave_alist_config;
787                 cluster->alist_record.meta = ondisk->clu_record_meta;
788                 cluster->alist_record.info = cluster;
789                 cluster->alist_mdata.config = &Clu_slave_alist_config;
790                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
791                 cluster->alist_mdata.info = cluster;
792
793                 if (isnew == 0) {
794                         cluster->clu_btree_beg = ondisk->clu_btree_beg;
795                         cluster->clu_btree_end = ondisk->clu_btree_end;
796                 }
797         } else if (isnew) {
798                 error = hammer_io_new(volume->devvp, &cluster->io);
799         } else {
800                 error = 0;
801         }
802         if (error == 0 && isnew) {
803                 /*
804                  * If this is a new cluster we have to initialize
805                  * various ondisk structural elements.  The caller is
806                  * responsible for the remainder.
807                  */
808                 struct hammer_alist_live dummy;
809                 hammer_node_t croot;
810                 hammer_volume_ondisk_t voldisk;
811                 int32_t nbuffers;
812
813                 hammer_modify_cluster(cluster);
814                 ondisk = cluster->ondisk;
815                 voldisk = volume->ondisk;
816
817                 dummy.config = &Buf_alist_config;
818                 dummy.meta = ondisk->head.buf_almeta;
819                 dummy.info = NULL;
820                 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
821
822                 hammer_alist_init(&cluster->alist_master);
823                 hammer_alist_init(&cluster->alist_btree);
824                 hammer_alist_init(&cluster->alist_record);
825                 hammer_alist_init(&cluster->alist_mdata);
826
827                 ondisk->vol_fsid = voldisk->vol_fsid;
828                 ondisk->vol_fstype = voldisk->vol_fstype;
829                 ondisk->clu_gen = 1;
830                 ondisk->clu_id = 0;     /* XXX */
831                 ondisk->clu_no = cluster->clu_no;
832                 ondisk->clu_flags = 0;
833                 ondisk->clu_start = HAMMER_BUFSIZE;
834                 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
835                 if (voldisk->vol_clo_end - cluster->io.offset >
836                     voldisk->vol_clsize) {
837                         ondisk->clu_limit = voldisk->vol_clsize;
838                 } else {
839                         ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
840                                                       cluster->io.offset);
841                 }
842                 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
843                 hammer_alist_free(&cluster->alist_master, 1, nbuffers - 1);
844                 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
845                 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
846                 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
847
848                 /*
849                  * Initialize the B-Tree.  We don't know what the caller
850                  * intends to do with the cluster so make sure it causes
851                  * an assertion if the caller makes no changes.
852                  */
853                 ondisk->clu_btree_parent_vol_no = -2;
854                 ondisk->clu_btree_parent_clu_no = -2;
855                 ondisk->clu_btree_parent_offset = -2;
856                 ondisk->clu_btree_parent_clu_gen = -2;
857                 hammer_modify_cluster_done(cluster);
858
859                 croot = hammer_alloc_btree(cluster, &error);
860                 if (error == 0) {
861                         hammer_modify_node(croot);
862                         bzero(croot->ondisk, sizeof(*croot->ondisk));
863                         croot->ondisk->count = 0;
864                         croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
865                         hammer_modify_node_done(croot);
866                         hammer_modify_cluster(cluster);
867                         ondisk->clu_btree_root = croot->node_offset;
868                         hammer_modify_cluster_done(cluster);
869                         hammer_rel_node(croot);
870                 }
871         }
872         hammer_unlock(&cluster->io.lock);
873         return (error);
874 }
875
876 /*
877  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
878  */
879 int
880 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
881 {
882         hammer_ref(&cluster->io.lock);
883         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
884                 hammer_unload_buffer, NULL);
885         KKASSERT(cluster->io.lock.refs == 1);
886         hammer_rel_cluster(cluster, 1);
887         return(0);
888 }
889
890 /*
891  * Reference a cluster that is either already referenced or via a specially
892  * handled pointer (aka rootcl).
893  */
894 int
895 hammer_ref_cluster(hammer_cluster_t cluster)
896 {
897         int error;
898
899         KKASSERT(cluster != NULL);
900         hammer_ref(&cluster->io.lock);
901
902         /*
903          * Deal with on-disk info
904          */
905         if (cluster->ondisk == NULL) {
906                 error = hammer_load_cluster(cluster, 0);
907                 if (error)
908                         hammer_rel_cluster(cluster, 1);
909         } else {
910                 error = 0;
911         }
912         return(error);
913 }
914
915 /*
916  * Release a cluster.  We have to deal with several places where
917  * another thread can ref the cluster.
918  *
919  * Only destroy the structure itself if the related buffer cache buffer
920  * was disassociated from it.  This ties the management of the structure
921  * to the buffer cache subsystem.
922  */
923 void
924 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
925 {
926         hammer_node_t node;
927         hammer_volume_t volume;
928
929         if (cluster->io.lock.refs == 1) {
930                 hammer_lock_ex(&cluster->io.lock);
931                 if (cluster->io.lock.refs == 1) {
932                         /*
933                          * Release the I/O.  If we or the kernel wants to
934                          * flush, this will release the bp.  Otherwise the
935                          * bp may be written and flushed passively by the
936                          * kernel later on.
937                          */
938                         hammer_io_release(&cluster->io, flush);
939
940                         /*
941                          * The B-Tree node cache is not counted in the
942                          * cluster's reference count.  Clean out the
943                          * cache.
944                          *
945                          * If the cluster acquires a new reference while we
946                          * are trying to clean it out, abort the cleaning.
947                          * 
948                          * Any actively referenced nodes will reference the
949                          * related buffer and cluster, so a ref count check
950                          * should be sufficient.
951                          */
952                         while (cluster->io.bp == NULL &&
953                                cluster->io.lock.refs == 1 &&
954                                (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
955                         ) {
956                                 KKASSERT(node->lock.refs == 0);
957                                 hammer_flush_node(node);
958                         }
959
960                         /*
961                          * Final cleanup
962                          */
963                         if (cluster != cluster->volume->hmp->rootcl &&
964                             cluster->io.bp == NULL &&
965                             cluster->io.lock.refs == 1 &&
966                             RB_EMPTY(&cluster->rb_nods_root)) {
967                                 KKASSERT(RB_EMPTY(&cluster->rb_bufs_root));
968                                 volume = cluster->volume;
969                                 RB_REMOVE(hammer_clu_rb_tree,
970                                           &volume->rb_clus_root, cluster);
971                                 cluster->volume = NULL; /* sanity */
972                                 --hammer_count_clusters;
973                                 kfree(cluster, M_HAMMER);
974                                 hammer_rel_volume(volume, 0);
975                                 return;
976                         }
977                 }
978                 hammer_unlock(&cluster->io.lock);
979         }
980         hammer_unref(&cluster->io.lock);
981 }
982
983 /************************************************************************
984  *                              BUFFERS                                 *
985  ************************************************************************
986  *
987  * Manage buffers.  Note that a buffer holds a reference to its associated
988  * cluster, and its cluster will hold a reference to the cluster's volume.
989  *
990  * A non-zero buf_type indicates that a new buffer should be created and
991  * zero'd.
992  */
993 hammer_buffer_t
994 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
995                   u_int64_t buf_type, int *errorp)
996 {
997         hammer_buffer_t buffer;
998
999         /*
1000          * Find the buffer.  Note that buffer 0 corresponds to the cluster
1001          * header and should never be requested.
1002          */
1003         KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1004                  buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1005
1006         /*
1007          * Locate and lock the buffer structure, creating one if necessary.
1008          */
1009 again:
1010         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1011         if (buffer == NULL) {
1012                 ++hammer_count_buffers;
1013                 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1014                 buffer->buf_no = buf_no;
1015                 buffer->cluster = cluster;
1016                 buffer->volume = cluster->volume;
1017                 buffer->io.offset = cluster->io.offset +
1018                                     (buf_no * HAMMER_BUFSIZE);
1019                 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
1020                 TAILQ_INIT(&buffer->clist);
1021                 hammer_ref(&buffer->io.lock);
1022
1023                 /*
1024                  * Insert the cluster into the RB tree and handle late
1025                  * collisions.
1026                  */
1027                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1028                         hammer_unref(&buffer->io.lock);
1029                         --hammer_count_buffers;
1030                         kfree(buffer, M_HAMMER);
1031                         goto again;
1032                 }
1033                 hammer_ref(&cluster->io.lock);
1034         } else {
1035                 hammer_ref(&buffer->io.lock);
1036         }
1037
1038         /*
1039          * Deal with on-disk info
1040          */
1041         if (buffer->ondisk == NULL || buf_type) {
1042                 *errorp = hammer_load_buffer(buffer, buf_type);
1043                 if (*errorp) {
1044                         hammer_rel_buffer(buffer, 1);
1045                         buffer = NULL;
1046                 }
1047         } else {
1048                 *errorp = 0;
1049         }
1050         return(buffer);
1051 }
1052
1053 static int
1054 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1055 {
1056         hammer_volume_t volume;
1057         hammer_fsbuf_ondisk_t ondisk;
1058         int error;
1059
1060         /*
1061          * Load the buffer's on-disk info
1062          */
1063         volume = buffer->volume;
1064         hammer_lock_ex(&buffer->io.lock);
1065         if (buffer->ondisk == NULL) {
1066                 if (buf_type) {
1067                         error = hammer_io_new(volume->devvp, &buffer->io);
1068                 } else {
1069                         error = hammer_io_read(volume->devvp, &buffer->io);
1070                 }
1071                 if (error) {
1072                         hammer_unlock(&buffer->io.lock);
1073                         return (error);
1074                 }
1075                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1076                 buffer->alist.config = &Buf_alist_config;
1077                 buffer->alist.meta = ondisk->head.buf_almeta;
1078                 buffer->buf_type = ondisk->head.buf_type;
1079         } else if (buf_type) {
1080                 error = hammer_io_new(volume->devvp, &buffer->io);
1081         } else {
1082                 error = 0;
1083         }
1084         if (error == 0 && buf_type) {
1085                 ondisk = buffer->ondisk;
1086                 initbuffer(&buffer->alist, &ondisk->head, buf_type);
1087                 buffer->buf_type = ondisk->head.buf_type;
1088         }
1089         hammer_unlock(&buffer->io.lock);
1090         return (error);
1091 }
1092
1093 /*
1094  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1095  */
1096 int
1097 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1098 {
1099         hammer_ref(&buffer->io.lock);
1100         hammer_flush_buffer_nodes(buffer);
1101         KKASSERT(buffer->io.lock.refs == 1);
1102         hammer_rel_buffer(buffer, 1);
1103         return(0);
1104 }
1105
1106 /*
1107  * Reference a buffer that is either already referenced or via a specially
1108  * handled pointer (aka cursor->buffer).
1109  */
1110 int
1111 hammer_ref_buffer(hammer_buffer_t buffer)
1112 {
1113         int error;
1114
1115         hammer_ref(&buffer->io.lock);
1116         if (buffer->ondisk == NULL) {
1117                 error = hammer_load_buffer(buffer, 0);
1118                 if (error) {
1119                         hammer_rel_buffer(buffer, 1);
1120                         /*
1121                          * NOTE: buffer pointer can become stale after
1122                          * the above release.
1123                          */
1124                 } else {
1125                         KKASSERT(buffer->buf_type ==
1126                                  buffer->ondisk->head.buf_type);
1127                 }
1128         } else {
1129                 error = 0;
1130         }
1131         return(error);
1132 }
1133
1134 /*
1135  * Release a buffer.  We have to deal with several places where
1136  * another thread can ref the buffer.
1137  *
1138  * Only destroy the structure itself if the related buffer cache buffer
1139  * was disassociated from it.  This ties the management of the structure
1140  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1141  * embedded io is referenced or not.
1142  */
1143 void
1144 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1145 {
1146         hammer_cluster_t cluster;
1147         hammer_node_t node;
1148
1149         if (buffer->io.lock.refs == 1) {
1150                 hammer_lock_ex(&buffer->io.lock);
1151                 if (buffer->io.lock.refs == 1) {
1152                         hammer_io_release(&buffer->io, flush);
1153
1154                         /*
1155                          * Clean out the B-Tree node cache, if any, then
1156                          * clean up the cluster ref and free the buffer.
1157                          *
1158                          * If the buffer acquires a new reference while we
1159                          * are trying to clean it out, abort the cleaning.
1160                          */
1161                         while (buffer->io.bp == NULL &&
1162                                buffer->io.lock.refs == 1 &&
1163                                (node = TAILQ_FIRST(&buffer->clist)) != NULL
1164                         ) {
1165                                 KKASSERT(node->lock.refs == 0);
1166                                 hammer_flush_node(node);
1167                         }
1168                         if (buffer->io.bp == NULL &&
1169                             hammer_islastref(&buffer->io.lock)) {
1170                                 cluster = buffer->cluster;
1171                                 RB_REMOVE(hammer_buf_rb_tree,
1172                                           &cluster->rb_bufs_root, buffer);
1173                                 buffer->cluster = NULL; /* sanity */
1174                                 --hammer_count_buffers;
1175                                 kfree(buffer, M_HAMMER);
1176                                 hammer_rel_cluster(cluster, 0);
1177                                 return;
1178                         }
1179                 }
1180                 hammer_unlock(&buffer->io.lock);
1181         }
1182         hammer_unref(&buffer->io.lock);
1183 }
1184
1185 /*
1186  * Flush passively cached B-Tree nodes associated with this buffer.
1187  *
1188  * NOTE: The buffer is referenced and locked.
1189  */
1190 void
1191 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1192 {
1193         hammer_node_t node;
1194
1195         node = TAILQ_FIRST(&buffer->clist);
1196         while (node) {
1197                 buffer->save_scan = TAILQ_NEXT(node, entry);
1198                 if (node->lock.refs == 0) {
1199                         hammer_ref(&node->lock);
1200                         node->flags |= HAMMER_NODE_FLUSH;
1201                         hammer_rel_node(node);
1202                 }
1203                 node = buffer->save_scan;
1204         }
1205 }
1206
1207 /************************************************************************
1208  *                              NODES                                   *
1209  ************************************************************************
1210  *
1211  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1212  * method used by the HAMMER filesystem.
1213  *
1214  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1215  * associated with its buffer.  It can have an active buffer reference
1216  * even when the node itself has no references.  The node also passively
1217  * associates itself with its cluster without holding any cluster refs.
1218  * The cluster ref is indirectly maintained by the active buffer ref when
1219  * a node is acquired.
1220  *
1221  * A hammer_node can also be passively associated with other HAMMER
1222  * structures, such as inodes, while retaining 0 references.  These
1223  * associations can be cleared backwards using a pointer-to-pointer in
1224  * the hammer_node.
1225  *
1226  * This allows the HAMMER implementation to cache hammer_node's long-term
1227  * and short-cut a great deal of the infrastructure's complexity.  In
1228  * most cases a cached node can be reacquired without having to dip into
1229  * either the buffer or cluster management code.
1230  *
1231  * The caller must pass a referenced cluster on call and will retain
1232  * ownership of the reference on return.  The node will acquire its own
1233  * additional references, if necessary.
1234  */
1235 hammer_node_t
1236 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1237 {
1238         hammer_node_t node;
1239
1240         /*
1241          * Locate the structure, allocating one if necessary.
1242          */
1243 again:
1244         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1245                          node_offset);
1246         if (node == NULL) {
1247                 ++hammer_count_nodes;
1248                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1249                 node->node_offset = node_offset;
1250                 node->cluster = cluster;
1251                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1252                               node)) {
1253                         --hammer_count_nodes;
1254                         kfree(node, M_HAMMER);
1255                         goto again;
1256                 }
1257         }
1258         *errorp = hammer_ref_node(node);
1259         if (*errorp) {
1260                 /*
1261                  * NOTE: The node pointer may be stale on error return.
1262                  * In fact, its probably been destroyed.
1263                  */
1264                 node = NULL;
1265         }
1266         return(node);
1267 }
1268
1269 /*
1270  * Reference the node to prevent disassociations, then associate and
1271  * load the related buffer.  This routine can also be called to reference
1272  * a node from a cache pointer.
1273  *
1274  * NOTE: Because the caller does not have a ref on the node, the caller's
1275  * node pointer will be stale if an error is returned.  We may also wind
1276  * up clearing the related cache pointers.
1277  *
1278  * NOTE: The cluster is indirectly referenced by our buffer ref.
1279  */
1280 int
1281 hammer_ref_node(hammer_node_t node)
1282 {
1283         hammer_buffer_t buffer;
1284         int32_t buf_no;
1285         int error;
1286
1287         hammer_ref(&node->lock);
1288         error = 0;
1289         if (node->ondisk == NULL) {
1290                 hammer_lock_ex(&node->lock);
1291                 if (node->ondisk == NULL) {
1292                         /*
1293                          * This is a little confusing but the jist is that
1294                          * node->buffer determines whether the node is on
1295                          * the buffer's clist and node->ondisk determines
1296                          * whether the buffer is referenced.
1297                          */
1298                         if ((buffer = node->buffer) != NULL) {
1299                                 error = hammer_ref_buffer(buffer);
1300                         } else {
1301                                 buf_no = node->node_offset / HAMMER_BUFSIZE;
1302                                 buffer = hammer_get_buffer(node->cluster,
1303                                                            buf_no, 0, &error);
1304                                 if (buffer) {
1305                                         KKASSERT(error == 0);
1306                                         TAILQ_INSERT_TAIL(&buffer->clist,
1307                                                           node, entry);
1308                                         node->buffer = buffer;
1309                                 }
1310                         }
1311                         if (error == 0) {
1312                                 node->ondisk = (void *)((char *)buffer->ondisk +
1313                                        (node->node_offset & HAMMER_BUFMASK));
1314                         }
1315                 }
1316                 hammer_unlock(&node->lock);
1317         }
1318         if (error)
1319                 hammer_rel_node(node);
1320         return (error);
1321 }
1322
1323 /*
1324  * Release a hammer_node.  The node retains a passive association with
1325  * its cluster, buffer and caches.
1326  *
1327  * However, to avoid cluttering up kernel memory with tons of B-Tree
1328  * node cache structures we destroy the node if no passive cache or
1329  * (instantiated) buffer references exist.
1330  */
1331 void
1332 hammer_rel_node(hammer_node_t node)
1333 {
1334         hammer_cluster_t cluster;
1335         hammer_buffer_t buffer;
1336
1337         if (hammer_islastref(&node->lock)) {
1338                 cluster = node->cluster;
1339
1340                 /*
1341                  * Destroy the node if it is being deleted.  Free the node
1342                  * in the bitmap after we have unhooked it.
1343                  */
1344                 hammer_ref_cluster(cluster);
1345                 if (node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) {
1346                         hammer_flush_node(node);
1347                         RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1348                                   node);
1349                         if ((buffer = node->buffer) != NULL) {
1350                                 node->buffer = NULL;
1351                                 hammer_remove_node_clist(buffer, node);
1352                                 if (node->ondisk) {
1353                                         node->ondisk = NULL;
1354                                         hammer_rel_buffer(buffer, 0);
1355                                 }
1356                         }
1357                         if (node->flags & HAMMER_NODE_DELETED) {
1358                                 hammer_free_btree(node->cluster,
1359                                                   node->node_offset);
1360                                 if (node->node_offset ==
1361                                     cluster->ondisk->clu_btree_root) {
1362                                         kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1363                                         hammer_free_cluster(cluster);
1364                                         /*hammer_io_undirty(&cluster->io);*/
1365                                 }
1366                         }
1367                         hammer_rel_cluster(cluster, 0);
1368                         --hammer_count_nodes;
1369                         kfree(node, M_HAMMER);
1370                         return;
1371                 }
1372
1373                 /*
1374                  * node->ondisk determines whether we have a buffer reference
1375                  * to get rid of or not.  Only get rid of the reference if
1376                  * the kernel tried to flush the buffer.
1377                  *
1378                  * NOTE: Once unref'd the node can be physically destroyed,
1379                  * so our node is stale afterwords.
1380                  *
1381                  * This case occurs if the node still has cache references.
1382                  * We could remove the references and free the structure
1383                  * but for now we allow them (and the node structure) to
1384                  * remain intact.
1385                  */
1386                 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1387                         hammer_flush_node(node);
1388                         buffer = node->buffer;
1389                         node->buffer = NULL;
1390                         node->ondisk = NULL;
1391                         hammer_remove_node_clist(buffer, node);
1392                         hammer_rel_buffer(buffer, 0);
1393                 }
1394
1395                 /*
1396                  * Clutter control, this case only occurs after a failed
1397                  * load since otherwise ondisk will be non-NULL.
1398                  */
1399                 if (node->cache1 == NULL && node->cache2 == NULL && 
1400                     node->ondisk == NULL) {
1401                         RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1402                                   node);
1403                         if ((buffer = node->buffer) != NULL) {
1404                                 node->buffer = NULL; /* sanity */
1405                                 node->ondisk = NULL; /* sanity */
1406                                 hammer_remove_node_clist(buffer, node);
1407                         }
1408                         --hammer_count_nodes;
1409                         node->lock.refs = -1;   /* sanity */
1410                         kfree(node, M_HAMMER);
1411                 } else {
1412                         hammer_unref(&node->lock);
1413                 }
1414
1415                 /*
1416                  * We have to do this last, after the node has been removed
1417                  * from the cluster's RB tree or we risk a deadlock due to
1418                  * hammer_rel_buffer->hammer_rel_cluster->(node deadlock)
1419                  */
1420                 hammer_rel_cluster(cluster, 0);
1421         } else {
1422                 hammer_unref(&node->lock);
1423         }
1424 }
1425
1426 /*
1427  * Cache-and-release a hammer_node.  Kinda like catching and releasing a
1428  * fish, but keeping an eye on him.  The node is passively cached in *cache.
1429  *
1430  * NOTE!  HAMMER may NULL *cache at any time, even after you have
1431  * referenced the node!
1432  */
1433 void
1434 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1435 {
1436         hammer_node_t old;
1437
1438         /*
1439          * If the node is being deleted, don't cache it!
1440          */
1441         if (node->flags & HAMMER_NODE_DELETED)
1442                 return;
1443
1444         /*
1445          * Cache the node.  If we previously cached a different node we
1446          * have to give HAMMER a chance to destroy it.
1447          */
1448 again:
1449         if (node->cache1 != cache) {
1450                 if (node->cache2 != cache) {
1451                         if ((old = *cache) != NULL) {
1452                                 KKASSERT(node->lock.refs != 0);
1453                                 hammer_uncache_node(cache);
1454                                 goto again;
1455                         }
1456                         if (node->cache2)
1457                                 *node->cache2 = NULL;
1458                         node->cache2 = node->cache1;
1459                         node->cache1 = cache;
1460                         *cache = node;
1461                 } else {
1462                         struct hammer_node **tmp;
1463                         tmp = node->cache1;
1464                         node->cache1 = node->cache2;
1465                         node->cache2 = tmp;
1466                 }
1467         }
1468 }
1469
1470 void
1471 hammer_uncache_node(struct hammer_node **cache)
1472 {
1473         hammer_node_t node;
1474
1475         if ((node = *cache) != NULL) {
1476                 *cache = NULL;
1477                 if (node->cache1 == cache) {
1478                         node->cache1 = node->cache2;
1479                         node->cache2 = NULL;
1480                 } else if (node->cache2 == cache) {
1481                         node->cache2 = NULL;
1482                 } else {
1483                         panic("hammer_uncache_node: missing cache linkage");
1484                 }
1485                 if (node->cache1 == NULL && node->cache2 == NULL)
1486                         hammer_flush_node(node);
1487         }
1488 }
1489
1490 /*
1491  * Remove a node's cache references and destroy the node if it has no
1492  * other references or backing store.
1493  */
1494 void
1495 hammer_flush_node(hammer_node_t node)
1496 {
1497         hammer_buffer_t buffer;
1498
1499         if (node->cache1)
1500                 *node->cache1 = NULL;
1501         if (node->cache2)
1502                 *node->cache2 = NULL;
1503         if (node->lock.refs == 0 && node->ondisk == NULL) {
1504                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1505                           node);
1506                 if ((buffer = node->buffer) != NULL) {
1507                         node->buffer = NULL;
1508                         hammer_remove_node_clist(buffer, node);
1509                         /* buffer is unreferenced because ondisk is NULL */
1510                 }
1511                 --hammer_count_nodes;
1512                 kfree(node, M_HAMMER);
1513         }
1514 }
1515
1516 /*
1517  * Remove a node from the buffer's clist.  Adjust save_scan as appropriate.
1518  * This is in its own little routine to properly handle interactions with
1519  * save_scan, so it is possible to block while scanning a buffer's node list.
1520  */
1521 static
1522 void
1523 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1524 {
1525         if (buffer->save_scan == node)
1526                 buffer->save_scan = TAILQ_NEXT(node, entry);
1527         TAILQ_REMOVE(&buffer->clist, node, entry);
1528 }
1529
1530 /************************************************************************
1531  *                              A-LIST ALLOCATORS                       *
1532  ************************************************************************/
1533
1534 /*
1535  * Allocate HAMMER clusters
1536  */
1537 hammer_cluster_t
1538 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1539                      int *errorp)
1540 {
1541         hammer_volume_t volume;
1542         hammer_cluster_t cluster;
1543         int32_t clu_no;
1544         int32_t clu_hint;
1545         int32_t vol_beg;
1546         int32_t vol_no;
1547
1548         /*
1549          * Figure out our starting volume and hint.
1550          */
1551         if (cluster_hint) {
1552                 vol_beg = cluster_hint->volume->vol_no;
1553                 clu_hint = cluster_hint->clu_no;
1554         } else {
1555                 vol_beg = hmp->volume_iterator;
1556                 clu_hint = -1;
1557         }
1558
1559         /*
1560          * Loop through volumes looking for a free cluster.  If allocating
1561          * a new cluster relative to an existing cluster try to find a free
1562          * cluster on either side (clu_hint >= 0), otherwise just do a
1563          * forwards iteration.
1564          */
1565         vol_no = vol_beg;
1566         do {
1567                 volume = hammer_get_volume(hmp, vol_no, errorp);
1568                 kprintf("VOLUME %p %d\n", volume, vol_no);
1569                 if (*errorp) {
1570                         clu_no = HAMMER_ALIST_BLOCK_NONE;
1571                         break;
1572                 }
1573                 hammer_modify_volume(volume);
1574                 if (clu_hint == -1) {
1575                         clu_hint = volume->clu_iterator;
1576                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1577                                                         clu_hint);
1578                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1579                                 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1580                                                                 1, 0);
1581                         }
1582                 } else {
1583                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1584                                                         clu_hint);
1585                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1586                                 clu_no = hammer_alist_alloc_rev(&volume->alist,
1587                                                                 1, clu_hint);
1588                         }
1589                 }
1590                 hammer_modify_volume_done(volume);
1591                 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1592                         break;
1593                 hammer_rel_volume(volume, 0);
1594                 volume = NULL;
1595                 *errorp = ENOSPC;
1596                 vol_no = (vol_no + 1) % hmp->nvolumes;
1597                 clu_hint = -1;
1598         } while (vol_no != vol_beg);
1599
1600         /*
1601          * Acquire the cluster.  On success this will force *errorp to 0.
1602          */
1603         if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1604                 kprintf("ALLOC CLUSTER %d\n", clu_no);
1605                 cluster = hammer_get_cluster(volume, clu_no, errorp, 1);
1606                 volume->clu_iterator = clu_no;
1607                 hammer_rel_volume(volume, 0);
1608         } else {
1609                 cluster = NULL;
1610         }
1611         if (cluster)
1612                 hammer_lock_ex(&cluster->io.lock);
1613         return(cluster);
1614 }
1615
1616 void
1617 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound, 
1618                     hammer_base_elm_t right_bound)
1619 {
1620         hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1621
1622         hammer_modify_cluster(cluster);
1623         ondisk->clu_btree_beg = *left_bound;
1624         ondisk->clu_btree_end = *right_bound;
1625         cluster->clu_btree_beg = ondisk->clu_btree_beg;
1626         cluster->clu_btree_end = ondisk->clu_btree_end;
1627         hammer_modify_cluster_done(cluster);
1628 }
1629
1630 /*
1631  * Deallocate a cluster
1632  */
1633 void
1634 hammer_free_cluster(hammer_cluster_t cluster)
1635 {
1636         hammer_modify_cluster(cluster);
1637         hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1638         hammer_modify_cluster_done(cluster);
1639 }
1640
1641 /*
1642  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1643  *
1644  * The passed *bufferp should be initialized to NULL.  On successive calls
1645  * *bufferp caches the most recent buffer used until put away by the caller.
1646  * Note that previously returned pointers using the cached buffer become
1647  * invalid on successive calls which reuse *bufferp.
1648  *
1649  * All allocations first attempt to use the block found at the specified
1650  * iterator.  If that fails the first available block is used.  If that
1651  * fails a new buffer is allocated and associated with the buffer type
1652  * A-list and the element is allocated out of the new buffer.
1653  */
1654
1655 hammer_node_t
1656 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1657 {
1658         hammer_buffer_t buffer;
1659         hammer_alist_t live;
1660         hammer_node_t node;
1661         int32_t elm_no;
1662         int32_t buf_no;
1663         int32_t node_offset;
1664
1665         /*
1666          * Allocate a B-Tree element
1667          */
1668         hammer_modify_cluster(cluster);
1669         buffer = NULL;
1670         live = &cluster->alist_btree;
1671         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1672         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1673                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1674         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1675                 alloc_new_buffer(cluster, live,
1676                                  HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1677                                  cluster->ondisk->idx_index, errorp, &buffer);
1678                 elm_no = hammer_alist_alloc(live, 1);
1679                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1680                         *errorp = ENOSPC;
1681                         if (buffer)
1682                                 hammer_rel_buffer(buffer, 0);
1683                         hammer_modify_cluster_done(cluster);
1684                         return(NULL);
1685                 }
1686         }
1687         cluster->ondisk->idx_index = elm_no;
1688         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1689
1690         /*
1691          * Load and return the B-Tree element
1692          */
1693         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1694         node_offset = buf_no * HAMMER_BUFSIZE +
1695                       offsetof(union hammer_fsbuf_ondisk,
1696                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1697         node = hammer_get_node(cluster, node_offset, errorp);
1698         if (node) {
1699                 hammer_modify_node(node);
1700                 bzero(node->ondisk, sizeof(*node->ondisk));
1701                 hammer_modify_node_done(node);
1702         } else {
1703                 hammer_alist_free(live, elm_no, 1);
1704                 hammer_rel_node(node);
1705                 node = NULL;
1706         }
1707         hammer_modify_cluster_done(cluster);
1708         if (buffer)
1709                 hammer_rel_buffer(buffer, 0);
1710         return(node);
1711 }
1712
1713 void *
1714 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1715                   int *errorp, struct hammer_buffer **bufferp)
1716 {
1717         hammer_buffer_t buffer;
1718         hammer_alist_t live;
1719         int32_t elm_no;
1720         int32_t buf_no;
1721         int32_t nblks;
1722         void *item;
1723
1724         /*
1725          * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
1726          * for these allocations.
1727          */
1728         hammer_modify_cluster(cluster);
1729         if ((bytes & HAMMER_BUFMASK) == 0) {
1730                 nblks = bytes / HAMMER_BUFSIZE;
1731                 /* only one block allowed for now (so buffer can hold it) */
1732                 KKASSERT(nblks == 1);
1733
1734                 buf_no = hammer_alloc_master(cluster, nblks,
1735                                              cluster->ondisk->idx_ldata, 1);
1736                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1737                         *errorp = ENOSPC;
1738                         hammer_modify_cluster_done(cluster);
1739                         return(NULL);
1740                 }
1741                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1742                 cluster->ondisk->idx_ldata = buf_no;
1743                 hammer_modify_cluster_done(cluster);
1744                 buffer = *bufferp;
1745                 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1746                 if (buffer)
1747                         hammer_rel_buffer(buffer, 0);
1748                 buffer = *bufferp;
1749                 return(buffer->ondisk);
1750         }
1751
1752         /*
1753          * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
1754          * (64 bytes) for these allocations.
1755          */
1756         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1757         nblks /= HAMMER_DATA_BLKSIZE;
1758         live = &cluster->alist_mdata;
1759         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1760         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1761                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1762         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1763                 alloc_new_buffer(cluster, live,
1764                                  HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1765                                  cluster->ondisk->idx_data, errorp, bufferp);
1766                 elm_no = hammer_alist_alloc(live, nblks);
1767                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1768                         *errorp = ENOSPC;
1769                         hammer_modify_cluster_done(cluster);
1770                         return(NULL);
1771                 }
1772         }
1773         cluster->ondisk->idx_index = elm_no;
1774         hammer_modify_cluster_done(cluster);
1775
1776         /*
1777          * Load and return the B-Tree element
1778          */
1779         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1780         buffer = *bufferp;
1781         if (buffer == NULL || buffer->cluster != cluster ||
1782             buffer->buf_no != buf_no) {
1783                 if (buffer)
1784                         hammer_rel_buffer(buffer, 0);
1785                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1786                 *bufferp = buffer;
1787         }
1788         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1789         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1790         hammer_modify_buffer(buffer);
1791         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1792         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1793         hammer_modify_buffer_done(buffer);
1794         *errorp = 0;
1795         return(item);
1796 }
1797
1798 void *
1799 hammer_alloc_record(hammer_cluster_t cluster,
1800                     int *errorp, struct hammer_buffer **bufferp)
1801 {
1802         hammer_buffer_t buffer;
1803         hammer_alist_t live;
1804         int32_t elm_no;
1805         int32_t buf_no;
1806         void *item;
1807
1808         /*
1809          * Allocate a record element
1810          */
1811         hammer_modify_cluster(cluster);
1812         live = &cluster->alist_record;
1813         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1814         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1815                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1816         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1817                 alloc_new_buffer(cluster, live,
1818                                  HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1819                                  cluster->ondisk->idx_record, errorp, bufferp);
1820                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1821                 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1822                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1823                         *errorp = ENOSPC;
1824                         hammer_modify_cluster_done(cluster);
1825                         return(NULL);
1826                 }
1827         }
1828         cluster->ondisk->idx_record = elm_no;
1829         hammer_modify_cluster_done(cluster);
1830
1831         /*
1832          * Load and return the record element
1833          */
1834         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1835         buffer = *bufferp;
1836         if (buffer == NULL || buffer->cluster != cluster ||
1837             buffer->buf_no != buf_no) {
1838                 if (buffer)
1839                         hammer_rel_buffer(buffer, 0);
1840                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1841                 *bufferp = buffer;
1842         }
1843         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1844         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1845         hammer_modify_buffer(buffer);
1846         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1847         bzero(item, sizeof(union hammer_record_ondisk));
1848         hammer_modify_buffer_done(buffer);
1849         *errorp = 0;
1850         return(item);
1851 }
1852
1853 void
1854 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1855 {
1856         int32_t elm_no;
1857         int32_t nblks;
1858         hammer_alist_t live;
1859
1860         hammer_modify_cluster(buffer->cluster);
1861         if ((bytes & HAMMER_BUFMASK) == 0) {
1862                 nblks = bytes / HAMMER_BUFSIZE;
1863                 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1864                 hammer_alist_free(&buffer->cluster->alist_master,
1865                                   buffer->buf_no, nblks);
1866                 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1867                 hammer_modify_cluster_done(buffer->cluster);
1868                 return;
1869         }
1870
1871         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1872                  HAMMER_DATA_BLKSIZE;
1873         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1874         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1875         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1876         nblks /= HAMMER_DATA_BLKSIZE;
1877         live = &buffer->cluster->alist_mdata;
1878         hammer_alist_free(live, elm_no, nblks);
1879         hammer_modify_cluster_done(buffer->cluster);
1880 }
1881
1882 void
1883 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1884 {
1885         int32_t elm_no;
1886         hammer_alist_t live;
1887
1888         hammer_modify_cluster(buffer->cluster);
1889         elm_no = rec - &buffer->ondisk->record.recs[0];
1890         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1891         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1892         live = &buffer->cluster->alist_record;
1893         hammer_alist_free(live, elm_no, 1);
1894         hammer_modify_cluster_done(buffer->cluster);
1895 }
1896
1897 void
1898 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1899 {
1900         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1901         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1902         hammer_alist_t live;
1903         int32_t elm_no;
1904
1905         hammer_modify_cluster(cluster);
1906         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1907         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1908         live = &cluster->alist_btree;
1909         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1910         elm_no += fsbuf_offset / blksize;
1911         hammer_alist_free(live, elm_no, 1);
1912         hammer_modify_cluster_done(cluster);
1913 }
1914
1915 void
1916 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1917 {
1918         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1919         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1920         hammer_alist_t live;
1921         int32_t elm_no;
1922         int32_t buf_no;
1923         int32_t nblks;
1924
1925         hammer_modify_cluster(cluster);
1926         if ((bytes & HAMMER_BUFMASK) == 0) {
1927                 nblks = bytes / HAMMER_BUFSIZE;
1928                 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1929                 buf_no = bclu_offset / HAMMER_BUFSIZE;
1930                 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1931                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1932                 hammer_modify_cluster_done(cluster);
1933                 return;
1934         }
1935
1936         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1937         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1938         live = &cluster->alist_mdata;
1939         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1940         nblks /= HAMMER_DATA_BLKSIZE;
1941         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1942         elm_no += fsbuf_offset / blksize;
1943         hammer_alist_free(live, elm_no, nblks);
1944         hammer_modify_cluster_done(cluster);
1945 }
1946
1947 void
1948 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1949 {
1950         const int32_t blksize = sizeof(union hammer_record_ondisk);
1951         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1952         hammer_alist_t live;
1953         int32_t elm_no;
1954
1955         hammer_modify_cluster(cluster);
1956         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1957         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1958         live = &cluster->alist_record;
1959         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1960         elm_no += fsbuf_offset / blksize;
1961         hammer_alist_free(live, elm_no, 1);
1962         hammer_modify_cluster_done(cluster);
1963 }
1964
1965
1966 /*
1967  * Allocate a new filesystem buffer and assign it to the specified
1968  * filesystem buffer type.  The new buffer will be added to the
1969  * type-specific A-list and initialized.
1970  */
1971 static void
1972 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1973                  u_int64_t type, int32_t nelements,
1974                  int start, int *errorp, struct hammer_buffer **bufferp)
1975 {
1976         hammer_buffer_t buffer;
1977         int32_t buf_no;
1978         int isfwd;
1979
1980         if (*bufferp)
1981                 hammer_rel_buffer(*bufferp, 0);
1982         *bufferp = NULL;
1983
1984         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1985         isfwd = (type != HAMMER_FSBUF_RECORDS);
1986         buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1987         if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1988                 *errorp = ENOSPC;
1989                 return;
1990         }
1991
1992         /*
1993          * The new buffer must be initialized (type != 0) regardless of
1994          * whether we already have it cached or not, so don't try to
1995          * optimize the cached buffer check.  Just call hammer_get_buffer().
1996          */
1997         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1998         *bufferp = buffer;
1999
2000         /*
2001          * Finally, do a meta-free of the buffer's elements into the
2002          * type-specific A-list and update our statistics to reflect
2003          * the allocation.
2004          */
2005         if (buffer) {
2006 #if 0
2007                 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
2008                         buf_no, type, nelements);
2009 #endif
2010                 hammer_modify_buffer(buffer);  /*XXX*/
2011                 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
2012                                   nelements);
2013                 hammer_modify_buffer_done(buffer);  /*XXX*/
2014                 hammer_adjust_stats(cluster, type, 1);
2015         }
2016 }
2017
2018 /*
2019  * Sync dirty buffers to the media
2020  */
2021
2022 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2023 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2024
2025 int
2026 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2027 {
2028         struct hammer_sync_info info;
2029
2030         info.error = 0;
2031         info.waitfor = waitfor;
2032
2033         kprintf("hammer_sync\n");
2034         vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2035                       hammer_sync_scan1, hammer_sync_scan2, &info);
2036
2037         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2038                 hammer_sync_volume, &info);
2039         return(info.error);
2040 }
2041
2042 static int
2043 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2044 {
2045         struct hammer_inode *ip;
2046
2047         ip = VTOI(vp);
2048         if (vp->v_type == VNON || ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2049             RB_EMPTY(&vp->v_rbdirty_tree))) {
2050                 return(-1);
2051         }
2052         return(0);
2053 }
2054
2055 static int
2056 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2057 {
2058         struct hammer_sync_info *info = data;
2059         struct hammer_inode *ip;
2060         int error;
2061
2062         ip = VTOI(vp);
2063         if (vp->v_type == VNON || vp->v_type == VBAD ||
2064             ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2065              RB_EMPTY(&vp->v_rbdirty_tree))) {
2066                 return(0);
2067         }
2068         if (vp->v_type != VCHR) {
2069                 error = VOP_FSYNC(vp, info->waitfor);
2070                 if (error)
2071                         info->error = error;
2072         }
2073         return(0);
2074 }
2075
2076 int
2077 hammer_sync_volume(hammer_volume_t volume, void *data)
2078 {
2079         struct hammer_sync_info *info = data;
2080
2081         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2082                 hammer_sync_cluster, info);
2083         if (hammer_ref_volume(volume) == 0) {
2084                 hammer_io_flush(&volume->io, info);
2085                 hammer_rel_volume(volume, 0);
2086         }
2087         return(0);
2088 }
2089
2090 int
2091 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2092 {
2093         struct hammer_sync_info *info = data;
2094
2095         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2096                 hammer_sync_buffer, info);
2097         switch(cluster->state) {
2098         case HAMMER_CLUSTER_OPEN:
2099         case HAMMER_CLUSTER_IDLE:
2100                 if (hammer_ref_cluster(cluster) == 0) {
2101                         hammer_io_flush(&cluster->io, info);
2102                         hammer_rel_cluster(cluster, 0);
2103                 }
2104                 break;
2105         default:
2106                 break;
2107         }
2108         return(0);
2109 }
2110
2111 int
2112 hammer_sync_buffer(hammer_buffer_t buffer, void *data)
2113 {
2114         struct hammer_sync_info *info = data;
2115
2116         if (hammer_ref_buffer(buffer) == 0) {
2117                 hammer_lock_ex(&buffer->io.lock);
2118                 hammer_flush_buffer_nodes(buffer);
2119                 hammer_unlock(&buffer->io.lock);
2120                 hammer_io_flush(&buffer->io, info);
2121                 hammer_rel_buffer(buffer, 0);
2122         }
2123         return(0);
2124 }
2125
2126 /*
2127  * Generic buffer initialization
2128  */
2129 static void
2130 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2131 {
2132         head->buf_type = type;
2133         hammer_alist_init(live);
2134 }
2135
2136 /*
2137  * Calculate the cluster's offset in the volume.  This calculation is
2138  * slightly more complex when using superclusters because superclusters
2139  * are grouped in blocks of 16, followed by 16 x N clusters where N
2140  * is the number of clusters a supercluster can manage.
2141  */
2142 static int64_t
2143 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2144 {
2145         int32_t scl_group;
2146         int64_t scl_group_size;
2147         int64_t off;
2148
2149         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2150                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2151                             HAMMER_SCL_MAXCLUSTERS;
2152                 scl_group_size = 
2153                             ((int64_t)HAMMER_BUFSIZE *
2154                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2155                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2156                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2157                 scl_group_size += 
2158                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2159
2160                 off = volume->cluster_base +
2161                       scl_group * scl_group_size +
2162                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2163                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2164                        HAMMER_VOL_SUPERCLUSTER_GROUP))
2165                       * volume->vol_clsize;
2166         } else {
2167                 off = volume->cluster_base +
2168                       (int64_t)clu_no * volume->vol_clsize;
2169         }
2170         return(off);
2171 }
2172
2173 /*
2174  * Calculate a super-cluster's offset in the volume.
2175  */
2176 static int64_t
2177 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2178 {
2179         int64_t off;
2180         int32_t scl_group;
2181         int64_t scl_group_size;
2182
2183         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2184         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2185         if (scl_group) {
2186                 scl_group_size = 
2187                             ((int64_t)HAMMER_BUFSIZE *
2188                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2189                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2190                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2191                 scl_group_size += 
2192                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2193                 off = volume->cluster_base + (scl_group * scl_group_size) +
2194                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2195         } else {
2196                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2197         }
2198         return(off);
2199 }
2200
2201 /*
2202  *
2203  *
2204  */
2205 static int32_t
2206 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2207                     int32_t start, int isfwd)
2208 {
2209         int32_t buf_no;
2210
2211         hammer_modify_cluster(cluster);
2212         if (isfwd) {
2213                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2214                                                 nblks, start);
2215                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2216                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2217                                                 nblks, 0);
2218                 }
2219         } else {
2220                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2221                                                 nblks, start);
2222                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2223                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2224                                                 nblks, HAMMER_ALIST_BLOCK_MAX);
2225                 }
2226         }
2227         hammer_modify_cluster_done(cluster);
2228
2229         /*
2230          * Recover space from empty record, b-tree, and data a-lists.
2231          */
2232
2233         return(buf_no);
2234 }
2235
2236 /*
2237  * Adjust allocation statistics
2238  */
2239 static void
2240 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2241 {
2242         hammer_modify_cluster(cluster);
2243         hammer_modify_volume(cluster->volume);
2244         hammer_modify_volume(cluster->volume->hmp->rootvol);
2245
2246         switch(buf_type) {
2247         case HAMMER_FSBUF_BTREE:
2248                 cluster->ondisk->stat_idx_bufs += nblks;
2249                 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2250                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2251                 break;
2252         case HAMMER_FSBUF_DATA:
2253                 cluster->ondisk->stat_data_bufs += nblks;
2254                 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2255                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2256                 break;
2257         case HAMMER_FSBUF_RECORDS:
2258                 cluster->ondisk->stat_rec_bufs += nblks;
2259                 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2260                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2261                 break;
2262         }
2263         hammer_modify_cluster_done(cluster);
2264         hammer_modify_volume_done(cluster->volume);
2265         hammer_modify_volume_done(cluster->volume->hmp->rootvol);
2266 }
2267
2268 /*
2269  * A-LIST SUPPORT
2270  *
2271  * Setup the parameters for the various A-lists we use in hammer.  The
2272  * supercluster A-list must be chained to the cluster A-list and cluster
2273  * slave A-lists are chained to buffer A-lists.
2274  *
2275  * See hammer_init_alist_config() below.
2276  */
2277
2278 /*
2279  * A-LIST - cluster recursion into a filesystem buffer
2280  */
2281 static int
2282 buffer_alist_init(void *info, int32_t blk, int32_t radix)
2283 {
2284         return(0);
2285 #if 0
2286         hammer_cluster_t cluster = info;
2287         hammer_buffer_t buffer;
2288         int32_t buf_no;
2289         int error = 0;
2290
2291         /*
2292          * Calculate the buffer number, initialize based on the buffer type.
2293          * The buffer has already been allocated so assert that it has been
2294          * initialized.
2295          */
2296         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2297         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2298         if (buffer) {
2299                 hammer_adjust_stats(cluster, buffer->ondisk->head.buf_type, 1);
2300                 hammer_rel_buffer(buffer, 0);
2301         }
2302         return (error);
2303 #endif
2304 }
2305
2306 static int
2307 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2308 {
2309         return(0);
2310 #if 0
2311         hammer_cluster_t cluster = info;
2312         hammer_buffer_t buffer;
2313         int32_t buf_no;
2314         int error = 0;
2315
2316         /*
2317          * Calculate the buffer number, initialize based on the buffer type.
2318          * The buffer has already been allocated so assert that it has been
2319          * initialized.
2320          */
2321         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2322         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2323         if (buffer) {
2324                 hammer_adjust_stats(cluster, buffer->ondisk->head.buf_type, -1);
2325                 hammer_rel_buffer(buffer, 0);
2326         }
2327         return (error);
2328 #endif
2329 }
2330
2331 /*
2332  * Note: atblk can be negative and atblk - blk can go negative.
2333  */
2334 static int
2335 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2336                       int32_t count, int32_t atblk, int32_t *fullp)
2337 {
2338         hammer_cluster_t cluster = info;
2339         hammer_buffer_t buffer;
2340         int32_t buf_no;
2341         int32_t r;
2342         int error = 0;
2343
2344         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2345         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2346         if (buffer) {
2347                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2348
2349                 hammer_modify_buffer(buffer);
2350                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2351                 if (r != HAMMER_ALIST_BLOCK_NONE)
2352                         r += blk;
2353                 hammer_modify_buffer_done(buffer);
2354                 *fullp = hammer_alist_isfull(&buffer->alist);
2355                 hammer_rel_buffer(buffer, 0);
2356         } else {
2357                 r = HAMMER_ALIST_BLOCK_NONE;
2358         }
2359         return(r);
2360 }
2361
2362 static int
2363 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2364                       int32_t count, int32_t atblk, int32_t *fullp)
2365 {
2366         hammer_cluster_t cluster = info;
2367         hammer_buffer_t buffer;
2368         int32_t buf_no;
2369         int32_t r;
2370         int error = 0;
2371
2372         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2373         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2374         if (buffer) {
2375                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2376                 hammer_modify_buffer(buffer);
2377                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2378                 if (r != HAMMER_ALIST_BLOCK_NONE)
2379                         r += blk;
2380                 hammer_modify_buffer_done(buffer);
2381                 *fullp = hammer_alist_isfull(&buffer->alist);
2382                 hammer_rel_buffer(buffer, 0);
2383         } else {
2384                 r = HAMMER_ALIST_BLOCK_NONE;
2385                 *fullp = 0;
2386         }
2387         return(r);
2388 }
2389
2390 static void
2391 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2392                  int32_t base_blk, int32_t count, int32_t *emptyp)
2393 {
2394         hammer_cluster_t cluster = info;
2395         hammer_buffer_t buffer;
2396         int32_t buf_no;
2397         int error = 0;
2398
2399         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2400         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2401         if (buffer) {
2402                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2403                 hammer_modify_buffer(buffer);
2404                 hammer_alist_free(&buffer->alist, base_blk, count);
2405                 hammer_modify_buffer_done(buffer);
2406                 *emptyp = hammer_alist_isempty(&buffer->alist);
2407                 /* XXX don't bother updating the buffer is completely empty? */
2408                 hammer_rel_buffer(buffer, 0);
2409         } else {
2410                 *emptyp = 0;
2411         }
2412 }
2413
2414 static void
2415 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2416 {
2417 }
2418
2419 /*
2420  * A-LIST - super-cluster recursion into a cluster and cluster recursion
2421  * into a filesystem buffer.  A-List's are mostly self-contained entities,
2422  * but callbacks must be installed to recurse from one A-List to another.
2423  *
2424  * Implementing these callbacks allows us to operate a multi-layered A-List
2425  * as a single entity.
2426  */
2427 static int
2428 super_alist_init(void *info, int32_t blk, int32_t radix)
2429 {
2430         hammer_volume_t volume = info;
2431         hammer_supercl_t supercl;
2432         int32_t scl_no;
2433         int error = 0;
2434
2435         /*
2436          * Calculate the super-cluster number containing the cluster (blk)
2437          * and obtain the super-cluster buffer.
2438          */
2439         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2440         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2441         if (supercl)
2442                 hammer_rel_supercl(supercl, 0);
2443         return (error);
2444 }
2445
2446 static int
2447 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2448 {
2449         return(0);
2450 }
2451
2452 static int
2453 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2454                       int32_t count, int32_t atblk, int32_t *fullp)
2455 {
2456         hammer_volume_t volume = info;
2457         hammer_supercl_t supercl;
2458         int32_t scl_no;
2459         int32_t r;
2460         int error = 0;
2461
2462         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2463         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2464         if (supercl) {
2465                 hammer_modify_supercl(supercl);
2466                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2467                 if (r != HAMMER_ALIST_BLOCK_NONE)
2468                         r += blk;
2469                 hammer_modify_supercl_done(supercl);
2470                 *fullp = hammer_alist_isfull(&supercl->alist);
2471                 hammer_rel_supercl(supercl, 0);
2472         } else {
2473                 r = HAMMER_ALIST_BLOCK_NONE;
2474                 *fullp = 0;
2475         }
2476         return(r);
2477 }
2478
2479 static int
2480 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2481                       int32_t count, int32_t atblk, int32_t *fullp)
2482 {
2483         hammer_volume_t volume = info;
2484         hammer_supercl_t supercl;
2485         int32_t scl_no;
2486         int32_t r;
2487         int error = 0;
2488
2489         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2490         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2491         if (supercl) {
2492                 hammer_modify_supercl(supercl);
2493                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2494                 if (r != HAMMER_ALIST_BLOCK_NONE)
2495                         r += blk;
2496                 hammer_modify_supercl_done(supercl);
2497                 *fullp = hammer_alist_isfull(&supercl->alist);
2498                 hammer_rel_supercl(supercl, 0);
2499         } else { 
2500                 r = HAMMER_ALIST_BLOCK_NONE;
2501                 *fullp = 0;
2502         }
2503         return(r);
2504 }
2505
2506 static void
2507 super_alist_free(void *info, int32_t blk, int32_t radix,
2508                  int32_t base_blk, int32_t count, int32_t *emptyp)
2509 {
2510         hammer_volume_t volume = info;
2511         hammer_supercl_t supercl;
2512         int32_t scl_no;
2513         int error = 0;
2514
2515         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2516         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2517         if (supercl) {
2518                 hammer_modify_supercl(supercl);
2519                 hammer_alist_free(&supercl->alist, base_blk, count);
2520                 hammer_modify_supercl_done(supercl);
2521                 *emptyp = hammer_alist_isempty(&supercl->alist);
2522                 hammer_rel_supercl(supercl, 0);
2523         } else {
2524                 *emptyp = 0;
2525         }
2526 }
2527
2528 static void
2529 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2530 {
2531 }
2532
2533 void
2534 hammer_init_alist_config(void)
2535 {
2536         hammer_alist_config_t config;
2537
2538         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2539                               1, HAMMER_FSBUF_METAELMS);
2540         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2541                               1, HAMMER_VOL_METAELMS_1LYR);
2542         hammer_alist_template(&Vol_super_alist_config,
2543                           HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2544                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2545         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2546                               1, HAMMER_SUPERCL_METAELMS);
2547         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2548                               1, HAMMER_CLU_MASTER_METAELMS);
2549         hammer_alist_template(&Clu_slave_alist_config,
2550                               HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2551                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2552
2553         config = &Vol_super_alist_config;
2554         config->bl_radix_init = super_alist_init;
2555         config->bl_radix_destroy = super_alist_destroy;
2556         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2557         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2558         config->bl_radix_free = super_alist_free;
2559         config->bl_radix_print = super_alist_print;
2560
2561         config = &Clu_slave_alist_config;
2562         config->bl_radix_init = buffer_alist_init;
2563         config->bl_radix_destroy = buffer_alist_destroy;
2564         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2565         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2566         config->bl_radix_free = buffer_alist_free;
2567         config->bl_radix_print = buffer_alist_print;
2568 }
2569