HAMMER 4/many - more core infrastructure
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.5 2007/11/20 07:16:28 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54                         hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56                         u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58                         hammer_alist_t live, u_int64_t type, int32_t nelements,
59                         int32_t start,
60                         int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63                         int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65                         int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69
70 struct hammer_alist_config Buf_alist_config;
71 struct hammer_alist_config Vol_normal_alist_config;
72 struct hammer_alist_config Vol_super_alist_config;
73 struct hammer_alist_config Supercl_alist_config;
74 struct hammer_alist_config Clu_master_alist_config;
75 struct hammer_alist_config Clu_slave_alist_config;
76
77 /*
78  * Red-Black tree support for various structures
79  */
80 static int
81 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
82 {
83         if (ip1->obj_id < ip2->obj_id)
84                 return(-1);
85         if (ip1->obj_id > ip2->obj_id)
86                 return(1);
87         if (ip1->obj_asof < ip2->obj_asof)
88                 return(-1);
89         if (ip1->obj_asof > ip2->obj_asof)
90                 return(1);
91         return(0);
92 }
93
94 static int
95 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
96 {
97         if (info->obj_id < ip->obj_id)
98                 return(-1);
99         if (info->obj_id > ip->obj_id)
100                 return(1);
101         if (info->obj_asof < ip->obj_asof)
102                 return(-1);
103         if (info->obj_asof > ip->obj_asof)
104                 return(1);
105         return(0);
106 }
107
108 static int
109 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
110 {
111         if (vol1->vol_no < vol2->vol_no)
112                 return(-1);
113         if (vol1->vol_no > vol2->vol_no)
114                 return(1);
115         return(0);
116 }
117
118 static int
119 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
120 {
121         if (cl1->scl_no < cl2->scl_no)
122                 return(-1);
123         if (cl1->scl_no > cl2->scl_no)
124                 return(1);
125         return(0);
126 }
127
128 static int
129 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
130 {
131         if (cl1->clu_no < cl2->clu_no)
132                 return(-1);
133         if (cl1->clu_no > cl2->clu_no)
134                 return(1);
135         return(0);
136 }
137
138 static int
139 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
140 {
141         if (buf1->buf_no < buf2->buf_no)
142                 return(-1);
143         if (buf1->buf_no > buf2->buf_no)
144                 return(1);
145         return(0);
146 }
147
148 static int
149 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
150 {
151         if (node1->node_offset < node2->node_offset)
152                 return(-1);
153         if (node1->node_offset < node2->node_offset)
154                 return(1);
155         return(0);
156 }
157
158 /*
159  * Note: The lookup function for hammer_ino_rb_tree winds up being named
160  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
161  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
162  */
163 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
164 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
165                 hammer_inode_info_cmp, hammer_inode_info_t);
166 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
167              hammer_vol_rb_compare, int32_t, vol_no);
168 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
169              hammer_scl_rb_compare, int32_t, scl_no);
170 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
171              hammer_clu_rb_compare, int32_t, clu_no);
172 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
173              hammer_buf_rb_compare, int32_t, buf_no);
174 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
175              hammer_nod_rb_compare, int32_t, node_offset);
176
177 /************************************************************************
178  *                              VOLUMES                                 *
179  ************************************************************************
180  *
181  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
182  * code on failure.  Volumes must be loaded at mount time, get_volume() will
183  * not load a new volume.
184  *
185  * Calls made to hammer_load_volume() or single-threaded
186  */
187 int
188 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
189 {
190         struct mount *mp;
191         hammer_volume_t volume;
192         struct hammer_volume_ondisk *ondisk;
193         struct nlookupdata nd;
194         struct buf *bp = NULL;
195         int error;
196         int ronly;
197
198         mp = hmp->mp;
199         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
200
201         /*
202          * Allocate a volume structure
203          */
204         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
205         volume->vol_name = kstrdup(volname, M_HAMMER);
206         volume->hmp = hmp;
207         volume->io.type = HAMMER_STRUCTURE_VOLUME;
208         volume->io.offset = 0LL;
209
210         /*
211          * Get the device vnode
212          */
213         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
214         if (error == 0)
215                 error = nlookup(&nd);
216         if (error == 0)
217                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
218         nlookup_done(&nd);
219         if (error == 0) {
220                 vn_isdisk(volume->devvp, &error);
221         }
222         if (error == 0) {
223                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
224                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
225                                  FSCRED, NULL);
226                 vn_unlock(volume->devvp);
227         }
228         if (error) {
229                 hammer_free_volume(volume);
230                 return(error);
231         }
232
233         /*
234          * Extract the volume number from the volume header and do various
235          * sanity checks.
236          */
237         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
238         if (error)
239                 goto late_failure;
240         ondisk = (void *)bp->b_data;
241         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
242                 kprintf("hammer_mount: volume %s has an invalid header\n",
243                         volume->vol_name);
244                 error = EFTYPE;
245                 goto late_failure;
246         }
247         volume->vol_no = ondisk->vol_no;
248         volume->cluster_base = ondisk->vol_clo_beg;
249         volume->vol_clsize = ondisk->vol_clsize;
250         volume->vol_flags = ondisk->vol_flags;
251         RB_INIT(&volume->rb_clus_root);
252         RB_INIT(&volume->rb_scls_root);
253
254         if (RB_EMPTY(&hmp->rb_vols_root)) {
255                 hmp->fsid = ondisk->vol_fsid;
256         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
257                 kprintf("hammer_mount: volume %s's fsid does not match "
258                         "other volumes\n", volume->vol_name);
259                 error = EFTYPE;
260                 goto late_failure;
261         }
262
263         /*
264          * Insert the volume structure into the red-black tree.
265          */
266         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
267                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
268                         volume->vol_name, volume->vol_no);
269                 error = EEXIST;
270         }
271
272         /*
273          * Set the root volume and load the root cluster.  HAMMER special
274          * cases rootvol and rootcl and will not deallocate the structures.
275          * We do not hold a ref because this would prevent related I/O
276          * from being flushed.
277          */
278         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
279                 hmp->rootvol = volume;
280                 hmp->rootcl = hammer_get_cluster(volume,
281                                                  ondisk->vol0_root_clu_no,
282                                                  &error, 0);
283                 hammer_rel_cluster(hmp->rootcl, 0);
284                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
285         }
286 late_failure:
287         if (bp)
288                 brelse(bp);
289         if (error) {
290                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
291                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
292                 hammer_free_volume(volume);
293         }
294         return (error);
295 }
296
297 /*
298  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
299  * so returns -1 on failure.
300  */
301 int
302 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
303 {
304         struct hammer_mount *hmp = volume->hmp;
305         hammer_cluster_t rootcl;
306         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
307
308         /*
309          * Sync clusters, sync volume
310          */
311
312
313         /*
314          * Clean up the root cluster, which is held unlocked in the root
315          * volume.
316          */
317         if (hmp->rootvol == volume) {
318                 if ((rootcl = hmp->rootcl) != NULL)
319                         hmp->rootcl = NULL;
320                 hmp->rootvol = NULL;
321         }
322
323         /*
324          * Unload clusters and super-clusters.  Unloading a super-cluster
325          * also unloads related clusters, but the filesystem may not be
326          * using super-clusters so unload clusters anyway.
327          */
328         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
329                         hammer_unload_cluster, NULL);
330         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
331                         hammer_unload_supercl, NULL);
332
333         /*
334          * Release our buffer and flush anything left in the buffer cache.
335          */
336         hammer_io_release(&volume->io, 1);
337
338         /*
339          * There should be no references on the volume.
340          */
341         KKASSERT(volume->io.lock.refs == 0);
342
343         volume->ondisk = NULL;
344         if (volume->devvp) {
345                 if (ronly) {
346                         vinvalbuf(volume->devvp, 0, 0, 0);
347                         VOP_CLOSE(volume->devvp, FREAD);
348                 } else {
349                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
350                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
351                 }
352         }
353
354         /*
355          * Destroy the structure
356          */
357         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
358         hammer_free_volume(volume);
359         return(0);
360 }
361
362 static
363 void
364 hammer_free_volume(hammer_volume_t volume)
365 {
366         if (volume->vol_name) {
367                 kfree(volume->vol_name, M_HAMMER);
368                 volume->vol_name = NULL;
369         }
370         if (volume->devvp) {
371                 vrele(volume->devvp);
372                 volume->devvp = NULL;
373         }
374         kfree(volume, M_HAMMER);
375 }
376
377 /*
378  * Get a HAMMER volume.  The volume must already exist.
379  */
380 hammer_volume_t
381 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
382 {
383         struct hammer_volume *volume;
384
385         /*
386          * Locate the volume structure
387          */
388         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
389         if (volume == NULL) {
390                 *errorp = ENOENT;
391                 return(NULL);
392         }
393         hammer_ref(&volume->io.lock);
394
395         /*
396          * Deal with on-disk info
397          */
398         if (volume->ondisk == NULL) {
399                 *errorp = hammer_load_volume(volume);
400                 if (*errorp) {
401                         hammer_rel_volume(volume, 1);
402                         volume = NULL;
403                 }
404         } else {
405                 *errorp = 0;
406         }
407         return(volume);
408 }
409
410 hammer_volume_t
411 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
412 {
413         hammer_volume_t volume;
414
415         volume = hmp->rootvol;
416         KKASSERT(volume != NULL);
417         hammer_ref(&volume->io.lock);
418
419         /*
420          * Deal with on-disk info
421          */
422         if (volume->ondisk == NULL) {
423                 *errorp = hammer_load_volume(volume);
424                 if (*errorp) {
425                         hammer_rel_volume(volume, 1);
426                         volume = NULL;
427                 }
428         } else {
429                 *errorp = 0;
430         }
431         return (volume);
432 }
433
434 /*
435  * Load a volume's on-disk information.  The volume must be referenced and
436  * not locked.  We temporarily acquire an exclusive lock to interlock
437  * against releases or multiple get's.
438  */
439 static int
440 hammer_load_volume(hammer_volume_t volume)
441 {
442         struct hammer_volume_ondisk *ondisk;
443         int error;
444
445         hammer_lock_ex(&volume->io.lock);
446         if (volume->ondisk == NULL) {
447                 error = hammer_io_read(volume->devvp, &volume->io);
448                 if (error) {
449                         hammer_unlock(&volume->io.lock);
450                         return (error);
451                 }
452                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
453
454                 /*
455                  * Configure the volume's A-lists.  These are used to
456                  * allocate clusters.
457                  */
458                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
459                         volume->alist.config = &Vol_super_alist_config;
460                         volume->alist.meta = ondisk->vol_almeta.super;
461                         volume->alist.info = volume;
462                 } else {
463                         volume->alist.config = &Vol_normal_alist_config;
464                         volume->alist.meta = ondisk->vol_almeta.normal;
465                         volume->alist.info = NULL;
466                 }
467                 hammer_alist_init(&volume->alist);
468         } else {
469                 error = 0;
470         }
471         hammer_unlock(&volume->io.lock);
472         return(0);
473 }
474
475 /*
476  * Release a volume.  Call hammer_io_release on the last reference.  We have
477  * to acquire an exclusive lock to interlock against volume->ondisk tests
478  * in hammer_load_volume().
479  */
480 void
481 hammer_rel_volume(hammer_volume_t volume, int flush)
482 {
483         if (hammer_islastref(&volume->io.lock)) {
484                 hammer_lock_ex(&volume->io.lock);
485                 if (hammer_islastref(&volume->io.lock)) {
486                         volume->ondisk = NULL;
487                         hammer_io_release(&volume->io, flush);
488                 }
489                 hammer_unlock(&volume->io.lock);
490         }
491         hammer_unref(&volume->io.lock);
492 }
493
494 /************************************************************************
495  *                              SUPER-CLUSTERS                          *
496  ************************************************************************
497  *
498  * Manage super-clusters.  Note that a supercl holds a reference to its
499  * associated volume.
500  */
501 hammer_supercl_t
502 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
503                    int *errorp, int isnew)
504 {
505         hammer_supercl_t supercl;
506
507         /*
508          * Locate and lock the super-cluster structure, creating one
509          * if necessary.
510          */
511 again:
512         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
513         if (supercl == NULL) {
514                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
515                 supercl->scl_no = scl_no;
516                 supercl->volume = volume;
517                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
518                 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
519                 hammer_ref(&supercl->io.lock);
520
521                 /*
522                  * Insert the cluster into the RB tree and handle late
523                  * collisions.
524                  */
525                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
526                         hammer_unref(&supercl->io.lock);
527                         kfree(supercl, M_HAMMER);
528                         goto again;
529                 }
530                 hammer_ref(&volume->io.lock);
531         } else {
532                 hammer_ref(&supercl->io.lock);
533         }
534
535         /*
536          * Deal with on-disk info
537          */
538         if (supercl->ondisk == NULL || isnew) {
539                 *errorp = hammer_load_supercl(supercl, isnew);
540                 if (*errorp) {
541                         hammer_rel_supercl(supercl, 1);
542                         supercl = NULL;
543                 }
544         } else {
545                 *errorp = 0;
546         }
547         return(supercl);
548 }
549
550 static int
551 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
552 {
553         struct hammer_supercl_ondisk *ondisk;
554         hammer_volume_t volume = supercl->volume;
555         int error;
556
557         hammer_lock_ex(&supercl->io.lock);
558         if (supercl->ondisk == NULL) {
559                 if (isnew)
560                         error = hammer_io_new(volume->devvp, &supercl->io);
561                 else
562                         error = hammer_io_read(volume->devvp, &supercl->io);
563                 if (error) {
564                         hammer_unlock(&supercl->io.lock);
565                         return (error);
566                 }
567                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
568
569                 supercl->alist.config = &Supercl_alist_config;
570                 supercl->alist.meta = ondisk->scl_meta;
571                 supercl->alist.info = NULL;
572
573                 /*
574                  * If this is a new super-cluster we have to initialize
575                  * various ondisk structural elements.  The caller is
576                  * responsible for the remainder.
577                  */
578                 if (isnew) {
579                         struct hammer_alist_live dummy;
580
581                         dummy.config = &Buf_alist_config;
582                         dummy.meta = ondisk->head.buf_almeta;
583                         dummy.info = NULL;
584                         initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
585                         hammer_alist_init(&supercl->alist);
586                 }
587         } else if (isnew) {
588                 error = hammer_io_new(volume->devvp, &supercl->io);
589         } else {
590                 error = 0;
591         }
592         hammer_unlock(&supercl->io.lock);
593         return (error);
594 }
595
596 /*
597  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
598  */
599 int
600 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
601 {
602         KKASSERT(supercl->io.lock.refs == 0);
603         hammer_ref(&supercl->io.lock);
604         hammer_io_release(&supercl->io, 1);
605         hammer_rel_supercl(supercl, 1);
606         return(0);
607 }
608
609 /*
610  * Release a super-cluster.  We have to deal with several places where
611  * another thread can ref the super-cluster.
612  *
613  * Only destroy the structure itself if the related buffer cache buffer
614  * was disassociated from it.  This ties the management of the structure
615  * to the buffer cache subsystem.
616  */
617 void
618 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
619 {
620         hammer_volume_t volume;
621
622         if (hammer_islastref(&supercl->io.lock)) {
623                 hammer_lock_ex(&supercl->io.lock);
624                 if (hammer_islastref(&supercl->io.lock)) {
625                         supercl->ondisk = NULL;
626                         hammer_io_release(&supercl->io, flush);
627                         if (supercl->io.bp == NULL &&
628                             hammer_islastref(&supercl->io.lock)) {
629                                 volume = supercl->volume;
630                                 RB_REMOVE(hammer_scl_rb_tree,
631                                           &volume->rb_scls_root, supercl);
632                                 supercl->volume = NULL; /* sanity */
633                                 kfree(supercl, M_HAMMER);
634                                 hammer_rel_volume(volume, 0);
635                                 return;
636                         }
637                 }
638                 hammer_unlock(&supercl->io.lock);
639         }
640         hammer_unref(&supercl->io.lock);
641 }
642
643 /************************************************************************
644  *                              CLUSTERS                                *
645  ************************************************************************
646  *
647  * Manage clusters.  Note that a cluster holds a reference to its
648  * associated volume.
649  */
650 hammer_cluster_t
651 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
652                    int *errorp, int isnew)
653 {
654         hammer_cluster_t cluster;
655
656 again:
657         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
658         if (cluster == NULL) {
659                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
660                 cluster->clu_no = clu_no;
661                 cluster->volume = volume;
662                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
663                 RB_INIT(&cluster->rb_bufs_root);
664                 RB_INIT(&cluster->rb_nods_root);
665                 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
666                 hammer_ref(&cluster->io.lock);
667
668                 /*
669                  * Insert the cluster into the RB tree and handle late
670                  * collisions.
671                  */
672                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
673                         hammer_unref(&cluster->io.lock);
674                         kfree(cluster, M_HAMMER);
675                         goto again;
676                 }
677                 hammer_ref(&volume->io.lock);
678         } else {
679                 hammer_ref(&cluster->io.lock);
680         }
681
682         /*
683          * Deal with on-disk info
684          */
685         if (cluster->ondisk == NULL || isnew) {
686                 *errorp = hammer_load_cluster(cluster, isnew);
687                 if (*errorp) {
688                         hammer_rel_cluster(cluster, 1);
689                         cluster = NULL;
690                 }
691         } else {
692                 *errorp = 0;
693         }
694         return (cluster);
695 }
696
697 hammer_cluster_t
698 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
699 {
700         hammer_cluster_t cluster;
701
702         cluster = hmp->rootcl;
703         KKASSERT(cluster != NULL);
704         hammer_ref(&cluster->io.lock);
705
706         /*
707          * Deal with on-disk info
708          */
709         if (cluster->ondisk == NULL) {
710                 *errorp = hammer_load_cluster(cluster, 0);
711                 if (*errorp) {
712                         hammer_rel_cluster(cluster, 1);
713                         cluster = NULL;
714                 }
715         } else {
716                 *errorp = 0;
717         }
718         return (cluster);
719 }
720
721 static
722 int
723 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
724 {
725         hammer_volume_t volume = cluster->volume;
726         struct hammer_cluster_ondisk *ondisk;
727         int error;
728
729         /*
730          * Load the cluster's on-disk info
731          */
732         hammer_lock_ex(&cluster->io.lock);
733         if (cluster->ondisk == NULL) {
734                 if (isnew)
735                         error = hammer_io_new(volume->devvp, &cluster->io);
736                 else
737                         error = hammer_io_read(volume->devvp, &cluster->io);
738                 if (error) {
739                         hammer_unlock(&cluster->io.lock);
740                         return (error);
741                 }
742                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
743
744                 cluster->alist_master.config = &Clu_master_alist_config;
745                 cluster->alist_master.meta = ondisk->clu_master_meta;
746                 cluster->alist_btree.config = &Clu_slave_alist_config;
747                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
748                 cluster->alist_btree.info = cluster;
749                 cluster->alist_record.config = &Clu_slave_alist_config;
750                 cluster->alist_record.meta = ondisk->clu_record_meta;
751                 cluster->alist_record.info = cluster;
752                 cluster->alist_mdata.config = &Clu_slave_alist_config;
753                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
754                 cluster->alist_mdata.info = cluster;
755
756                 cluster->clu_btree_beg = ondisk->clu_btree_beg;
757                 cluster->clu_btree_end = ondisk->clu_btree_end;
758
759                 /*
760                  * If this is a new cluster we have to initialize
761                  * various ondisk structural elements.  The caller is
762                  * responsible for the remainder.
763                  */
764                 if (isnew) {
765                         struct hammer_alist_live dummy;
766
767                         dummy.config = &Buf_alist_config;
768                         dummy.meta = ondisk->head.buf_almeta;
769                         dummy.info = NULL;
770                         initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
771
772                         hammer_alist_init(&cluster->alist_master);
773                         hammer_alist_init(&cluster->alist_btree);
774                         hammer_alist_init(&cluster->alist_record);
775                         hammer_alist_init(&cluster->alist_mdata);
776                 }
777         } else if (isnew) {
778                 error = hammer_io_new(volume->devvp, &cluster->io);
779         } else {
780                 error = 0;
781         }
782         hammer_unlock(&cluster->io.lock);
783         return (error);
784 }
785
786 /*
787  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
788  */
789 int
790 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
791 {
792         hammer_ref(&cluster->io.lock);
793         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
794                         hammer_unload_buffer, NULL);
795         KKASSERT(cluster->io.lock.refs == 1);
796         hammer_io_release(&cluster->io, 1);
797         hammer_rel_cluster(cluster, 1);
798         return(0);
799 }
800
801 /*
802  * Reference a cluster that is either already referenced or via a specially
803  * handled pointer (aka rootcl).
804  */
805 int
806 hammer_ref_cluster(hammer_cluster_t cluster)
807 {
808         int error;
809
810         KKASSERT(cluster != NULL);
811         hammer_ref(&cluster->io.lock);
812
813         /*
814          * Deal with on-disk info
815          */
816         if (cluster->ondisk == NULL) {
817                 error = hammer_load_cluster(cluster, 0);
818                 if (error)
819                         hammer_rel_cluster(cluster, 1);
820         } else {
821                 error = 0;
822         }
823         return(error);
824 }
825
826 /*
827  * Release a cluster.  We have to deal with several places where
828  * another thread can ref the cluster.
829  *
830  * Only destroy the structure itself if the related buffer cache buffer
831  * was disassociated from it.  This ties the management of the structure
832  * to the buffer cache subsystem.
833  */
834 void
835 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
836 {
837         hammer_node_t node;
838         hammer_volume_t volume;
839
840         if (hammer_islastref(&cluster->io.lock)) {
841                 hammer_lock_ex(&cluster->io.lock);
842                 if (hammer_islastref(&cluster->io.lock)) {
843                         cluster->ondisk = NULL;
844                         hammer_io_release(&cluster->io, flush);
845
846                         /*
847                          * Clean out the B-Tree node cache, if any, then
848                          * clean up the volume ref and free the cluster.
849                          *
850                          * If the cluster acquires a new reference while we
851                          * are trying to clean it out, abort the cleaning.
852                          *
853                          * There really shouldn't be any nodes at this point
854                          * but we allow a node with no buffer association
855                          * so handle the case.
856                          */
857                         while (cluster->io.bp == NULL &&
858                                hammer_islastref(&cluster->io.lock) &&
859                                (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
860                         ) {
861                                 KKASSERT(node->lock.refs == 0);
862                                 hammer_flush_node(node);
863                         }
864                         if (cluster->io.bp == NULL &&
865                             hammer_islastref(&cluster->io.lock)) {
866                                 volume = cluster->volume;
867                                 RB_REMOVE(hammer_clu_rb_tree,
868                                           &volume->rb_clus_root, cluster);
869                                 cluster->volume = NULL; /* sanity */
870                                 kfree(cluster, M_HAMMER);
871                                 hammer_rel_volume(volume, 0);
872                                 return;
873                         }
874                 }
875                 hammer_unlock(&cluster->io.lock);
876         }
877         hammer_unref(&cluster->io.lock);
878 }
879
880 /************************************************************************
881  *                              BUFFERS                                 *
882  ************************************************************************
883  *
884  * Manage buffers.  Note that a buffer holds a reference to its associated
885  * cluster, and its cluster will hold a reference to the cluster's volume.
886  *
887  * A non-zero buf_type indicates that a new buffer should be created and
888  * zero'd.
889  */
890 hammer_buffer_t
891 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
892                   u_int64_t buf_type, int *errorp)
893 {
894         hammer_buffer_t buffer;
895
896         /*
897          * Find the buffer.  Note that buffer 0 corresponds to the cluster
898          * header and should never be requested.
899          */
900         KKASSERT(buf_no != 0);
901
902         /*
903          * Locate and lock the buffer structure, creating one if necessary.
904          */
905 again:
906         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
907         if (buffer == NULL) {
908                 buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
909                 buffer->buf_no = buf_no;
910                 buffer->cluster = cluster;
911                 buffer->volume = cluster->volume;
912                 buffer->io.offset = cluster->io.offset +
913                                     (buf_no * HAMMER_BUFSIZE);
914                 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
915                 TAILQ_INIT(&buffer->clist);
916                 hammer_ref(&buffer->io.lock);
917
918                 /*
919                  * Insert the cluster into the RB tree and handle late
920                  * collisions.
921                  */
922                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
923                         hammer_unref(&buffer->io.lock);
924                         kfree(buffer, M_HAMMER);
925                         goto again;
926                 }
927                 hammer_ref(&cluster->io.lock);
928         } else {
929                 hammer_ref(&buffer->io.lock);
930         }
931
932         /*
933          * Deal with on-disk info
934          */
935         if (buffer->ondisk == NULL || buf_type) {
936                 *errorp = hammer_load_buffer(buffer, buf_type);
937                 if (*errorp) {
938                         hammer_rel_buffer(buffer, 1);
939                         buffer = NULL;
940                 }
941         } else {
942                 *errorp = 0;
943         }
944         return(buffer);
945 }
946
947 static int
948 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
949 {
950         hammer_volume_t volume;
951         hammer_fsbuf_ondisk_t ondisk;
952         int error;
953
954         /*
955          * Load the buffer's on-disk info
956          */
957         volume = buffer->volume;
958         hammer_lock_ex(&buffer->io.lock);
959         if (buffer->ondisk == NULL) {
960                 if (buf_type) {
961                         error = hammer_io_new(volume->devvp, &buffer->io);
962                 } else {
963                         error = hammer_io_read(volume->devvp, &buffer->io);
964                 }
965                 if (error) {
966                         hammer_unlock(&buffer->io.lock);
967                         return (error);
968                 }
969                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
970                 buffer->alist.config = &Buf_alist_config;
971                 buffer->alist.meta = ondisk->head.buf_almeta;
972
973                 if (buf_type) {
974                         initbuffer(&buffer->alist, &ondisk->head, buf_type);
975                 }
976                 buffer->buf_type = ondisk->head.buf_type;
977         } else if (buf_type) {
978                 error = hammer_io_new(volume->devvp, &buffer->io);
979         } else {
980                 error = 0;
981         }
982         hammer_unlock(&buffer->io.lock);
983         return (error);
984 }
985
986 /*
987  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
988  */
989 int
990 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
991 {
992         hammer_ref(&buffer->io.lock);
993         hammer_flush_buffer_nodes(buffer);
994         hammer_io_release(&buffer->io, 1);
995         KKASSERT(buffer->io.lock.refs == 1);
996         hammer_rel_buffer(buffer, 1);
997         return(0);
998 }
999
1000 /*
1001  * Reference a buffer that is either already referenced or via a specially
1002  * handled pointer (aka cursor->buffer).
1003  */
1004 int
1005 hammer_ref_buffer(hammer_buffer_t buffer)
1006 {
1007         int error;
1008
1009         hammer_ref(&buffer->io.lock);
1010         if (buffer->ondisk == NULL) {
1011                 error = hammer_load_buffer(buffer, 0);
1012                 if (error) {
1013                         hammer_rel_buffer(buffer, 1);
1014                         /*
1015                          * NOTE: buffer pointer can become stale after
1016                          * the above release.
1017                          */
1018                 } else {
1019                         KKASSERT(buffer->buf_type ==
1020                                  buffer->ondisk->head.buf_type);
1021                 }
1022         } else {
1023                 error = 0;
1024         }
1025         return(error);
1026 }
1027
1028 /*
1029  * Release a buffer.  We have to deal with several places where
1030  * another thread can ref the buffer.
1031  *
1032  * Only destroy the structure itself if the related buffer cache buffer
1033  * was disassociated from it.  This ties the management of the structure
1034  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1035  * embedded io is referenced or not.
1036  */
1037 void
1038 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1039 {
1040         hammer_cluster_t cluster;
1041         hammer_node_t node;
1042
1043         if (hammer_islastref(&buffer->io.lock)) {
1044                 hammer_lock_ex(&buffer->io.lock);
1045                 if (hammer_islastref(&buffer->io.lock)) {
1046                         buffer->ondisk = NULL;
1047                         hammer_io_release(&buffer->io, flush);
1048
1049                         /*
1050                          * Clean out the B-Tree node cache, if any, then
1051                          * clean up the cluster ref and free the buffer.
1052                          *
1053                          * If the buffer acquires a new reference while we
1054                          * are trying to clean it out, abort the cleaning.
1055                          */
1056                         while (buffer->io.bp == NULL &&
1057                                hammer_islastref(&buffer->io.lock) &&
1058                                (node = TAILQ_FIRST(&buffer->clist)) != NULL
1059                         ) {
1060                                 KKASSERT(node->lock.refs == 0);
1061                                 hammer_flush_node(node);
1062                         }
1063                         if (buffer->io.bp == NULL &&
1064                             hammer_islastref(&buffer->io.lock)) {
1065                                 cluster = buffer->cluster;
1066                                 RB_REMOVE(hammer_buf_rb_tree,
1067                                           &cluster->rb_bufs_root, buffer);
1068                                 buffer->cluster = NULL; /* sanity */
1069                                 kfree(buffer, M_HAMMER);
1070                                 hammer_rel_cluster(cluster, 0);
1071                                 return;
1072                         }
1073                 }
1074                 hammer_unlock(&buffer->io.lock);
1075         }
1076         hammer_unref(&buffer->io.lock);
1077 }
1078
1079 /*
1080  * Flush passively cached B-Tree nodes associated with this buffer.
1081  *
1082  * NOTE: The buffer is referenced and locked.
1083  */
1084 void
1085 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1086 {
1087         hammer_node_t node;
1088
1089         node = TAILQ_FIRST(&buffer->clist);
1090         while (node) {
1091                 buffer->save_scan = TAILQ_NEXT(node, entry);
1092                 if (node->lock.refs == 0)
1093                         hammer_flush_node(node);
1094                 node = buffer->save_scan;
1095         }
1096 }
1097
1098 /************************************************************************
1099  *                              NODES                                   *
1100  ************************************************************************
1101  *
1102  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1103  * method used by the HAMMER filesystem.
1104  *
1105  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1106  * associated with its buffer.  It can have an active buffer reference
1107  * even when the node itself has no references.  The node also passively
1108  * associates itself with its cluster without holding any cluster refs.
1109  * The cluster ref is indirectly maintained by the active buffer ref when
1110  * a node is acquired.
1111  *
1112  * A hammer_node can also be passively associated with other HAMMER
1113  * structures, such as inodes, while retaining 0 references.  These
1114  * associations can be cleared backwards using a pointer-to-pointer in
1115  * the hammer_node.
1116  *
1117  * This allows the HAMMER implementation to cache hammer_node's long-term
1118  * and short-cut a great deal of the infrastructure's complexity.  In
1119  * most cases a cached node can be reacquired without having to dip into
1120  * either the buffer or cluster management code.
1121  *
1122  * The caller must pass a referenced cluster on call and will retain
1123  * ownership of the reference on return.  The node will acquire its own
1124  * additional references, if necessary.
1125  */
1126 hammer_node_t
1127 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1128 {
1129         hammer_node_t node;
1130
1131         /*
1132          * Locate the structure, allocating one if necessary.
1133          */
1134 again:
1135         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1136                          node_offset);
1137         if (node == NULL) {
1138                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1139                 node->node_offset = node_offset;
1140                 node->cluster = cluster;
1141                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1142                               node)) {
1143                         kfree(node, M_HAMMER);
1144                         goto again;
1145                 }
1146         }
1147         *errorp = hammer_ref_node(node);
1148         if (*errorp) {
1149                 /*
1150                  * NOTE: The node pointer may be stale on error return.
1151                  * In fact, its probably been destroyed.
1152                  */
1153                 node = NULL;
1154         }
1155         return(node);
1156 }
1157
1158 /*
1159  * Reference the node to prevent disassociations, then associate and
1160  * load the related buffer.  This routine can also be called to reference
1161  * a node from a cache pointer.
1162  *
1163  * NOTE: Because the caller does not have a ref on the node, the caller's
1164  * node pointer will be stale if an error is returned.  We may also wind
1165  * up clearing the related cache pointers.
1166  *
1167  * NOTE: The cluster is indirectly referenced by our buffer ref.
1168  */
1169 int
1170 hammer_ref_node(hammer_node_t node)
1171 {
1172         hammer_buffer_t buffer;
1173         int32_t buf_no;
1174         int error;
1175
1176         hammer_ref(&node->lock);
1177         error = 0;
1178         if (node->ondisk == NULL) {
1179                 hammer_lock_ex(&node->lock);
1180                 if (node->ondisk == NULL) {
1181                         /*
1182                          * This is a little confusing but the jist is that
1183                          * node->buffer determines whether the node is on
1184                          * the buffer's clist and node->ondisk determines
1185                          * whether the buffer is referenced.
1186                          */
1187                         if ((buffer = node->buffer) != NULL) {
1188                                 error = hammer_ref_buffer(buffer);
1189                         } else {
1190                                 buf_no = node->node_offset / HAMMER_BUFSIZE;
1191                                 buffer = hammer_get_buffer(node->cluster,
1192                                                            buf_no, 0, &error);
1193                                 if (buffer) {
1194                                         KKASSERT(error == 0);
1195                                         TAILQ_INSERT_TAIL(&buffer->clist,
1196                                                           node, entry);
1197                                         node->buffer = buffer;
1198                                 }
1199                         }
1200                         if (error == 0) {
1201                                 node->ondisk = (void *)((char *)buffer->ondisk +
1202                                        (node->node_offset & HAMMER_BUFMASK));
1203                         }
1204                 }
1205                 hammer_unlock(&node->lock);
1206         }
1207         if (error)
1208                 hammer_rel_node(node);
1209         return (error);
1210 }
1211
1212 /*
1213  * Release a hammer_node.  The node retains a passive association with
1214  * its cluster, buffer and caches.
1215  *
1216  * However, to avoid cluttering up kernel memory with tons of B-Tree
1217  * node cache structures we destroy the node if no passive cache or
1218  * (instantiated) buffer references exist.
1219  */
1220 void
1221 hammer_rel_node(hammer_node_t node)
1222 {
1223         hammer_cluster_t cluster;
1224         hammer_buffer_t buffer;
1225
1226         if (hammer_islastref(&node->lock)) {
1227                 cluster = node->cluster;
1228                 /*
1229                  * Clutter control, this case only occurs after a failed
1230                  * load since otherwise ondisk will be non-NULL.
1231                  */
1232                 if (node->cache1 == NULL && node->cache2 == NULL && 
1233                     node->ondisk == NULL) {
1234                         RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1235                                   node);
1236                         if ((buffer = node->buffer) != NULL) {
1237                                 node->buffer = NULL;
1238                                 hammer_remove_node_clist(buffer, node);
1239                         }
1240                         kfree(node, M_HAMMER);
1241                         return;
1242                 }
1243
1244                 /*
1245                  * node->ondisk determines whether we have a buffer reference
1246                  * to get rid of or not.  Only get rid of the reference if
1247                  * the kernel tried to flush the buffer.
1248                  *
1249                  * NOTE: Once unref'd the node can be physically destroyed,
1250                  * so our node is stale afterwords.
1251                  *
1252                  * This case occurs if the node still has cache references.
1253                  * We could remove the references and free the structure
1254                  * but for now we allow them (and the node structure) to
1255                  * remain intact.
1256                  */
1257                 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1258                         buffer = node->buffer;
1259                         node->buffer = NULL;
1260                         node->ondisk = NULL;
1261                         hammer_remove_node_clist(buffer, node);
1262                         hammer_unref(&node->lock);
1263                         hammer_rel_buffer(buffer, 0);
1264                 } else {
1265                         hammer_unref(&node->lock);
1266                 }
1267         } else {
1268                 hammer_unref(&node->lock);
1269         }
1270 }
1271
1272 /*
1273  * Cache-and-release a hammer_node.  Kinda like catching and releasing a
1274  * fish, but keeping an eye on him.  The node is passively cached in *cache.
1275  *
1276  * NOTE!  HAMMER may NULL *cache at any time, even after you have
1277  * referenced the node!
1278  */
1279 void
1280 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1281 {
1282         if (node->cache1 != cache) {
1283                 if (node->cache2 == cache) {
1284                         struct hammer_node **tmp;
1285                         tmp = node->cache1;
1286                         node->cache1 = node->cache2;
1287                         node->cache2 = tmp;
1288                 } else {
1289                         if (node->cache2)
1290                                 *node->cache2 = NULL;
1291                         node->cache2 = node->cache1;
1292                         node->cache1 = cache;
1293                         *cache = node;
1294                 }
1295         }
1296 }
1297
1298 void
1299 hammer_uncache_node(struct hammer_node **cache)
1300 {
1301         hammer_node_t node;
1302
1303         if ((node = *cache) != NULL) {
1304                 *cache = NULL;
1305                 if (node->cache1 == cache) {
1306                         node->cache1 = node->cache2;
1307                         node->cache2 = NULL;
1308                 } else if (node->cache2 == cache) {
1309                         node->cache2 = NULL;
1310                 } else {
1311                         panic("hammer_uncache_node: missing cache linkage");
1312                 }
1313                 if (node->cache1 == NULL && node->cache2 == NULL &&
1314                     node->lock.refs == 0) {
1315                         hammer_flush_node(node);
1316                 }
1317         }
1318 }
1319
1320 /*
1321  * Remove a node's cache references and destroy the node if it has no
1322  * references.  This is typically called from the buffer handling code.
1323  *
1324  * The node may have an active buffer reference (ondisk != NULL) even
1325  * if the node itself has no references.
1326  *
1327  * Note that a caller iterating through nodes via a buffer must have its
1328  * own reference on the buffer or our hammer_rel_buffer() call below may
1329  * rip it out from under the caller.
1330  */
1331 void
1332 hammer_flush_node(hammer_node_t node)
1333 {
1334         hammer_buffer_t buffer;
1335
1336         if (node->cache1)
1337                 *node->cache1 = NULL;
1338         if (node->cache2)
1339                 *node->cache2 = NULL;
1340         if (node->lock.refs == 0) {
1341                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1342                           node);
1343                 if ((buffer = node->buffer) != NULL) {
1344                         node->buffer = NULL;
1345                         hammer_remove_node_clist(buffer, node);
1346                         if (node->ondisk) {
1347                                 node->ondisk = NULL;
1348                                 hammer_rel_buffer(buffer, 0);
1349                         }
1350                 }
1351                 kfree(node, M_HAMMER);
1352         }
1353 }
1354
1355 /*
1356  * Remove a node from the buffer's clist.  Adjust save_scan as appropriate.
1357  * This is in its own little routine to properly handle interactions with
1358  * save_scan, so it is possible to block while scanning a buffer's node list.
1359  */
1360 static
1361 void
1362 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1363 {
1364         if (buffer->save_scan == node)
1365                 buffer->save_scan = TAILQ_NEXT(node, entry);
1366         TAILQ_REMOVE(&buffer->clist, node, entry);
1367 }
1368
1369 /************************************************************************
1370  *                              A-LIST ALLOCATORS                       *
1371  ************************************************************************/
1372
1373 /*
1374  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1375  *
1376  * The passed *bufferp should be initialized to NULL.  On successive calls
1377  * *bufferp caches the most recent buffer used until put away by the caller.
1378  * Note that previously returned pointers using the cached buffer become
1379  * invalid on successive calls which reuse *bufferp.
1380  *
1381  * All allocations first attempt to use the block found at the specified
1382  * iterator.  If that fails the first available block is used.  If that
1383  * fails a new buffer is allocated and associated with the buffer type
1384  * A-list and the element is allocated out of the new buffer.
1385  */
1386
1387 hammer_node_t
1388 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1389 {
1390         hammer_buffer_t buffer;
1391         hammer_alist_t live;
1392         hammer_node_t node;
1393         int32_t elm_no;
1394         int32_t buf_no;
1395         int32_t node_offset;
1396
1397         /*
1398          * Allocate a B-Tree element
1399          */
1400         buffer = NULL;
1401         live = &cluster->alist_btree;
1402         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1403         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1404                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1405         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1406                 alloc_new_buffer(cluster, live,
1407                                  HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1408                                  cluster->ondisk->idx_index, errorp, &buffer);
1409                 elm_no = hammer_alist_alloc(live, 1);
1410                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1411                         *errorp = ENOSPC;
1412                         if (buffer)
1413                                 hammer_rel_buffer(buffer, 0);
1414                         return(NULL);
1415                 }
1416         }
1417         cluster->ondisk->idx_index = elm_no;
1418
1419         /*
1420          * Load and return the B-Tree element
1421          */
1422         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1423         node_offset = buf_no * HAMMER_BUFSIZE +
1424                       offsetof(union hammer_fsbuf_ondisk,
1425                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1426         node = hammer_get_node(cluster, node_offset, errorp);
1427         if (node) {
1428                 bzero(node->ondisk, sizeof(*node->ondisk));
1429         } else {
1430                 hammer_alist_free(live, elm_no, 1);
1431                 hammer_rel_node(node);
1432                 node = NULL;
1433         }
1434         if (buffer)
1435                 hammer_rel_buffer(buffer, 0);
1436         return(node);
1437 }
1438
1439 void *
1440 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1441                   int *errorp, struct hammer_buffer **bufferp)
1442 {
1443         hammer_buffer_t buffer;
1444         hammer_alist_t live;
1445         int32_t elm_no;
1446         int32_t buf_no;
1447         int32_t nblks;
1448         void *item;
1449
1450         /*
1451          * Allocate a data element
1452          */
1453         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1454         live = &cluster->alist_mdata;
1455         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1456         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1457                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1458         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1459                 alloc_new_buffer(cluster, live,
1460                                  HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1461                                  cluster->ondisk->idx_data, errorp, bufferp);
1462                 elm_no = hammer_alist_alloc(live, nblks);
1463                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1464                         *errorp = ENOSPC;
1465                         return(NULL);
1466                 }
1467         }
1468         cluster->ondisk->idx_index = elm_no;
1469
1470         /*
1471          * Load and return the B-Tree element
1472          */
1473         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1474         buffer = *bufferp;
1475         if (buffer == NULL || buffer->cluster != cluster ||
1476             buffer->buf_no != buf_no) {
1477                 if (buffer)
1478                         hammer_rel_buffer(buffer, 0);
1479                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1480                 *bufferp = buffer;
1481         }
1482         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE);
1483         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1484         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1485         *errorp = 0;
1486         return(item);
1487 }
1488
1489 void *
1490 hammer_alloc_record(hammer_cluster_t cluster,
1491                     int *errorp, struct hammer_buffer **bufferp)
1492 {
1493         hammer_buffer_t buffer;
1494         hammer_alist_t live;
1495         int32_t elm_no;
1496         int32_t buf_no;
1497         void *item;
1498
1499         /*
1500          * Allocate a record element
1501          */
1502         live = &cluster->alist_record;
1503         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1504         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1505                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1506         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1507                 alloc_new_buffer(cluster, live,
1508                                  HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1509                                  cluster->ondisk->idx_record, errorp, bufferp);
1510                 elm_no = hammer_alist_alloc(live, 1);
1511                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1512                         *errorp = ENOSPC;
1513                         return(NULL);
1514                 }
1515         }
1516         cluster->ondisk->idx_record = elm_no;
1517
1518         /*
1519          * Load and return the B-Tree element
1520          */
1521         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1522         buffer = *bufferp;
1523         if (buffer == NULL || buffer->cluster != cluster ||
1524             buffer->buf_no != buf_no) {
1525                 if (buffer)
1526                         hammer_rel_buffer(buffer, 0);
1527                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1528                 *bufferp = buffer;
1529         }
1530         KKASSERT(buffer->ondisk->head.buf_type != 0);
1531         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1532         bzero(item, sizeof(union hammer_record_ondisk));
1533         *errorp = 0;
1534         return(item);
1535 }
1536
1537 /*
1538  * Free HAMMER elements based on either a hammer_buffer and element pointer
1539  * or a cluster-relative byte offset.
1540  */
1541 void
1542 hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node)
1543 {
1544         int32_t elm_no;
1545         hammer_alist_t live;
1546
1547         elm_no = node - &buffer->ondisk->btree.nodes[0];
1548         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1549         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1550         live = &buffer->cluster->alist_btree;
1551         hammer_alist_free(live, elm_no, 1);
1552 }
1553
1554 void
1555 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1556 {
1557         int32_t elm_no;
1558         int32_t nblks;
1559         hammer_alist_t live;
1560
1561         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1562                  HAMMER_DATA_BLKSIZE;
1563         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1564         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1565         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1566         live = &buffer->cluster->alist_mdata;
1567         hammer_alist_free(live, elm_no, nblks);
1568 }
1569
1570 void
1571 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1572 {
1573         int32_t elm_no;
1574         hammer_alist_t live;
1575
1576         elm_no = rec - &buffer->ondisk->record.recs[0];
1577         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1578         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1579         live = &buffer->cluster->alist_record;
1580         hammer_alist_free(live, elm_no, 1);
1581 }
1582
1583 void
1584 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1585 {
1586         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1587         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1588         hammer_alist_t live;
1589         int32_t elm_no;
1590
1591         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1592         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1593         live = &cluster->alist_btree;
1594         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1595         elm_no += fsbuf_offset / blksize;
1596         hammer_alist_free(live, elm_no, 1);
1597 }
1598
1599 void
1600 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1601 {
1602         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1603         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1604         hammer_alist_t live;
1605         int32_t elm_no;
1606         int32_t nblks;
1607
1608         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1609         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1610         live = &cluster->alist_mdata;
1611         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1612         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1613         elm_no += fsbuf_offset / blksize;
1614         hammer_alist_free(live, elm_no, nblks);
1615 }
1616
1617 void
1618 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1619 {
1620         const int32_t blksize = sizeof(union hammer_record_ondisk);
1621         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1622         hammer_alist_t live;
1623         int32_t elm_no;
1624
1625         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1626         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1627         live = &cluster->alist_record;
1628         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1629         elm_no += fsbuf_offset / blksize;
1630         hammer_alist_free(live, elm_no, 1);
1631 }
1632
1633
1634 /*
1635  * Allocate a new filesystem buffer and assign it to the specified
1636  * filesystem buffer type.  The new buffer will be added to the
1637  * type-specific A-list and initialized.
1638  */
1639 static void
1640 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1641                  u_int64_t type, int32_t nelements,
1642                  int start, int *errorp, struct hammer_buffer **bufferp)
1643 {
1644         hammer_buffer_t buffer;
1645         int32_t buf_no;
1646
1647         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1648
1649         if (type == HAMMER_FSBUF_RECORDS) {
1650                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1651                                                 1, start);
1652                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1653                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1654                                                 1, HAMMER_ALIST_BLOCK_MAX);
1655                 }
1656         } else {
1657                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1658                                                 1, start);
1659                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1660                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1661                                                 1, 0);
1662                 }
1663         }
1664         KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1665
1666         /*
1667          * The new buffer must be initialized (type != 0) regardless of
1668          * whether we already have it cached or not, so don't try to
1669          * optimize the cached buffer check.  Just call hammer_get_buffer().
1670          */
1671         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1672         if (*bufferp)
1673                 hammer_rel_buffer(*bufferp, 0);
1674         *bufferp = buffer;
1675 }
1676
1677 #if 0
1678
1679 /*
1680  * Flush various tracking structures to disk
1681  */
1682
1683 /*
1684  * Flush various tracking structures to disk
1685  */
1686 void
1687 flush_all_volumes(void)
1688 {
1689         hammer_volume_t vol;
1690
1691         for (vol = VolBase; vol; vol = vol->next)
1692                 flush_volume(vol);
1693 }
1694
1695 void
1696 flush_volume(hammer_volume_t vol)
1697 {
1698         hammer_supercl_t supercl;
1699         hammer_cluster_t cl;
1700
1701         for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1702                 flush_supercl(supercl);
1703         for (cl = vol->cluster_base; cl; cl = cl->next)
1704                 flush_cluster(cl);
1705         writehammerbuf(vol, vol->ondisk, 0);
1706 }
1707
1708 void
1709 flush_supercl(hammer_supercl_t supercl)
1710 {
1711         int64_t supercl_offset;
1712
1713         supercl_offset = supercl->scl_offset;
1714         writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1715 }
1716
1717 void
1718 flush_cluster(hammer_cluster_t cl)
1719 {
1720         hammer_buffer_t buf;
1721         int64_t cluster_offset;
1722
1723         for (buf = cl->buffer_base; buf; buf = buf->next)
1724                 flush_buffer(buf);
1725         cluster_offset = cl->clu_offset;
1726         writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1727 }
1728
1729 void
1730 flush_buffer(hammer_buffer_t buf)
1731 {
1732         int64_t buffer_offset;
1733
1734         buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1735         writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1736 }
1737
1738 #endif
1739
1740 /*
1741  * Generic buffer initialization
1742  */
1743 static void
1744 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1745 {
1746         head->buf_type = type;
1747         hammer_alist_init(live);
1748 }
1749
1750 /*
1751  * Calculate the cluster's offset in the volume.  This calculation is
1752  * slightly more complex when using superclusters because superclusters
1753  * are grouped in blocks of 16, followed by 16 x N clusters where N
1754  * is the number of clusters a supercluster can manage.
1755  */
1756 static int64_t
1757 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1758 {
1759         int32_t scl_group;
1760         int64_t scl_group_size;
1761         int64_t off;
1762
1763         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1764                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1765                             HAMMER_SCL_MAXCLUSTERS;
1766                 scl_group_size = 
1767                             ((int64_t)HAMMER_BUFSIZE *
1768                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1769                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1770                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1771                 scl_group_size += 
1772                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1773
1774                 off = volume->cluster_base +
1775                       scl_group * scl_group_size +
1776                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1777                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1778                        HAMMER_VOL_SUPERCLUSTER_GROUP))
1779                       * HAMMER_BUFSIZE;
1780         } else {
1781                 off = volume->cluster_base +
1782                       (int64_t)clu_no * volume->vol_clsize;
1783         }
1784         return(off);
1785 }
1786
1787 /*
1788  * Calculate a super-cluster's offset in the volume.
1789  */
1790 static int64_t
1791 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1792 {
1793         int64_t off;
1794         int32_t scl_group;
1795         int64_t scl_group_size;
1796
1797         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1798         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1799         if (scl_group) {
1800                 scl_group_size = 
1801                             ((int64_t)HAMMER_BUFSIZE *
1802                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1803                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1804                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1805                 scl_group_size += 
1806                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1807                 off = volume->cluster_base + (scl_group * scl_group_size) +
1808                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1809         } else {
1810                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1811         }
1812         return(off);
1813 }
1814
1815 /*
1816  * A-LIST SUPPORT
1817  *
1818  * Setup the parameters for the various A-lists we use in hammer.  The
1819  * supercluster A-list must be chained to the cluster A-list and cluster
1820  * slave A-lists are chained to buffer A-lists.
1821  *
1822  * See hammer_init_alist_config() below.
1823  */
1824
1825 /*
1826  * A-LIST - cluster recursion into a filesystem buffer
1827  */
1828 static int
1829 buffer_alist_init(void *info, int32_t blk, int32_t radix)
1830 {
1831         hammer_cluster_t cluster = info;
1832         hammer_buffer_t buffer;
1833         int32_t buf_no;
1834         int error = 0;
1835
1836         /*
1837          * Calculate the buffer number, initialize based on the buffer type.
1838          * The buffer has already been allocated so assert that it has been
1839          * initialized.
1840          */
1841         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1842         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1843         if (buffer)
1844                 hammer_rel_buffer(buffer, 0);
1845         return (error);
1846 }
1847
1848 static int
1849 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
1850 {
1851         return (0);
1852 }
1853
1854 static int
1855 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1856                       int32_t count, int32_t atblk, int32_t *fullp)
1857 {
1858         hammer_cluster_t cluster = info;
1859         hammer_buffer_t buffer;
1860         int32_t buf_no;
1861         int32_t r;
1862         int error = 0;
1863
1864         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1865         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1866         if (buffer) {
1867                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1868
1869                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
1870                 if (r != HAMMER_ALIST_BLOCK_NONE)
1871                         r += blk;
1872                 *fullp = hammer_alist_isfull(&buffer->alist);
1873                 hammer_rel_buffer(buffer, 0);
1874         } else {
1875                 r = HAMMER_ALIST_BLOCK_NONE;
1876         }
1877         return(r);
1878 }
1879
1880 static int
1881 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1882                       int32_t count, int32_t atblk, int32_t *fullp)
1883 {
1884         hammer_cluster_t cluster = info;
1885         hammer_buffer_t buffer;
1886         int32_t buf_no;
1887         int32_t r;
1888         int error = 0;
1889
1890         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1891         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1892         if (buffer) {
1893                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1894
1895                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
1896                 if (r != HAMMER_ALIST_BLOCK_NONE)
1897                         r += blk;
1898                 *fullp = hammer_alist_isfull(&buffer->alist);
1899                 hammer_rel_buffer(buffer, 0);
1900         } else {
1901                 r = HAMMER_ALIST_BLOCK_NONE;
1902                 *fullp = 0;
1903         }
1904         return(r);
1905 }
1906
1907 static void
1908 buffer_alist_free(void *info, int32_t blk, int32_t radix,
1909                  int32_t base_blk, int32_t count, int32_t *emptyp)
1910 {
1911         hammer_cluster_t cluster = info;
1912         hammer_buffer_t buffer;
1913         int32_t buf_no;
1914         int error = 0;
1915
1916         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1917         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1918         if (buffer) {
1919                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1920                 hammer_alist_free(&buffer->alist, base_blk, count);
1921                 *emptyp = hammer_alist_isempty(&buffer->alist);
1922                 hammer_rel_buffer(buffer, 0);
1923         } else {
1924                 *emptyp = 0;
1925         }
1926 }
1927
1928 static void
1929 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
1930 {
1931 }
1932
1933 /*
1934  * A-LIST - super-cluster recursion into a cluster and cluster recursion
1935  * into a filesystem buffer.  A-List's are mostly self-contained entities,
1936  * but callbacks must be installed to recurse from one A-List to another.
1937  *
1938  * Implementing these callbacks allows us to operate a multi-layered A-List
1939  * as a single entity.
1940  */
1941 static int
1942 super_alist_init(void *info, int32_t blk, int32_t radix)
1943 {
1944         hammer_volume_t volume = info;
1945         hammer_supercl_t supercl;
1946         int32_t scl_no;
1947         int error = 0;
1948
1949         /*
1950          * Calculate the super-cluster number containing the cluster (blk)
1951          * and obtain the super-cluster buffer.
1952          */
1953         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1954         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1955         if (supercl)
1956                 hammer_rel_supercl(supercl, 0);
1957         return (error);
1958 }
1959
1960 static int
1961 super_alist_destroy(void *info, int32_t blk, int32_t radix)
1962 {
1963         return(0);
1964 }
1965
1966 static int
1967 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1968                       int32_t count, int32_t atblk, int32_t *fullp)
1969 {
1970         hammer_volume_t volume = info;
1971         hammer_supercl_t supercl;
1972         int32_t scl_no;
1973         int32_t r;
1974         int error = 0;
1975
1976         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1977         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1978         if (supercl) {
1979                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
1980                 if (r != HAMMER_ALIST_BLOCK_NONE)
1981                         r += blk;
1982                 *fullp = hammer_alist_isfull(&supercl->alist);
1983                 hammer_rel_supercl(supercl, 0);
1984         } else {
1985                 r = HAMMER_ALIST_BLOCK_NONE;
1986                 *fullp = 0;
1987         }
1988         return(r);
1989 }
1990
1991 static int
1992 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1993                       int32_t count, int32_t atblk, int32_t *fullp)
1994 {
1995         hammer_volume_t volume = info;
1996         hammer_supercl_t supercl;
1997         int32_t scl_no;
1998         int32_t r;
1999         int error = 0;
2000
2001         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2002         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2003         if (supercl) {
2004                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2005                 if (r != HAMMER_ALIST_BLOCK_NONE)
2006                         r += blk;
2007                 *fullp = hammer_alist_isfull(&supercl->alist);
2008                 hammer_rel_supercl(supercl, 0);
2009         } else { 
2010                 r = HAMMER_ALIST_BLOCK_NONE;
2011                 *fullp = 0;
2012         }
2013         return(r);
2014 }
2015
2016 static void
2017 super_alist_free(void *info, int32_t blk, int32_t radix,
2018                  int32_t base_blk, int32_t count, int32_t *emptyp)
2019 {
2020         hammer_volume_t volume = info;
2021         hammer_supercl_t supercl;
2022         int32_t scl_no;
2023         int error = 0;
2024
2025         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2026         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2027         if (supercl) {
2028                 hammer_alist_free(&supercl->alist, base_blk, count);
2029                 *emptyp = hammer_alist_isempty(&supercl->alist);
2030                 hammer_rel_supercl(supercl, 0);
2031         } else {
2032                 *emptyp = 0;
2033         }
2034 }
2035
2036 static void
2037 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2038 {
2039 }
2040
2041 void
2042 hammer_init_alist_config(void)
2043 {
2044         hammer_alist_config_t config;
2045
2046         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2047                               1, HAMMER_FSBUF_METAELMS);
2048         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2049                               1, HAMMER_VOL_METAELMS_1LYR);
2050         hammer_alist_template(&Vol_super_alist_config,
2051                               HAMMER_VOL_MAXSUPERCLUSTERS,
2052                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2053         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2054                               1, HAMMER_SUPERCL_METAELMS);
2055         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2056                               1, HAMMER_CLU_MASTER_METAELMS);
2057         hammer_alist_template(&Clu_slave_alist_config, HAMMER_CLU_MAXBUFFERS,
2058                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2059
2060         config = &Vol_super_alist_config;
2061         config->bl_radix_init = super_alist_init;
2062         config->bl_radix_destroy = super_alist_destroy;
2063         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2064         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2065         config->bl_radix_free = super_alist_free;
2066         config->bl_radix_print = super_alist_print;
2067
2068         config = &Clu_slave_alist_config;
2069         config->bl_radix_init = buffer_alist_init;
2070         config->bl_radix_destroy = buffer_alist_destroy;
2071         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2072         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2073         config->bl_radix_free = buffer_alist_free;
2074         config->bl_radix_print = buffer_alist_print;
2075 }
2076