HAMMER 6/many - memory->disk flush, single-cluster sync to disk, more vnops.
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.6 2007/11/26 05:03:11 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54                         hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56                         u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58                         hammer_alist_t live, u_int64_t type, int32_t nelements,
59                         int32_t start,
60                         int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63                         int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65                         int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69
70 struct hammer_alist_config Buf_alist_config;
71 struct hammer_alist_config Vol_normal_alist_config;
72 struct hammer_alist_config Vol_super_alist_config;
73 struct hammer_alist_config Supercl_alist_config;
74 struct hammer_alist_config Clu_master_alist_config;
75 struct hammer_alist_config Clu_slave_alist_config;
76
77 /*
78  * Red-Black tree support for various structures
79  */
80 static int
81 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
82 {
83         if (ip1->obj_id < ip2->obj_id)
84                 return(-1);
85         if (ip1->obj_id > ip2->obj_id)
86                 return(1);
87         if (ip1->obj_asof < ip2->obj_asof)
88                 return(-1);
89         if (ip1->obj_asof > ip2->obj_asof)
90                 return(1);
91         return(0);
92 }
93
94 static int
95 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
96 {
97         if (info->obj_id < ip->obj_id)
98                 return(-1);
99         if (info->obj_id > ip->obj_id)
100                 return(1);
101         if (info->obj_asof < ip->obj_asof)
102                 return(-1);
103         if (info->obj_asof > ip->obj_asof)
104                 return(1);
105         return(0);
106 }
107
108 static int
109 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
110 {
111         if (vol1->vol_no < vol2->vol_no)
112                 return(-1);
113         if (vol1->vol_no > vol2->vol_no)
114                 return(1);
115         return(0);
116 }
117
118 static int
119 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
120 {
121         if (cl1->scl_no < cl2->scl_no)
122                 return(-1);
123         if (cl1->scl_no > cl2->scl_no)
124                 return(1);
125         return(0);
126 }
127
128 static int
129 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
130 {
131         if (cl1->clu_no < cl2->clu_no)
132                 return(-1);
133         if (cl1->clu_no > cl2->clu_no)
134                 return(1);
135         return(0);
136 }
137
138 static int
139 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
140 {
141         if (buf1->buf_no < buf2->buf_no)
142                 return(-1);
143         if (buf1->buf_no > buf2->buf_no)
144                 return(1);
145         return(0);
146 }
147
148 static int
149 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
150 {
151         if (node1->node_offset < node2->node_offset)
152                 return(-1);
153         if (node1->node_offset > node2->node_offset)
154                 return(1);
155         return(0);
156 }
157
158 /*
159  * Note: The lookup function for hammer_ino_rb_tree winds up being named
160  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
161  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
162  */
163 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
164 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
165                 hammer_inode_info_cmp, hammer_inode_info_t);
166 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
167              hammer_vol_rb_compare, int32_t, vol_no);
168 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
169              hammer_scl_rb_compare, int32_t, scl_no);
170 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
171              hammer_clu_rb_compare, int32_t, clu_no);
172 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
173              hammer_buf_rb_compare, int32_t, buf_no);
174 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
175              hammer_nod_rb_compare, int32_t, node_offset);
176
177 /************************************************************************
178  *                              VOLUMES                                 *
179  ************************************************************************
180  *
181  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
182  * code on failure.  Volumes must be loaded at mount time, get_volume() will
183  * not load a new volume.
184  *
185  * Calls made to hammer_load_volume() or single-threaded
186  */
187 int
188 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
189 {
190         struct mount *mp;
191         hammer_volume_t volume;
192         struct hammer_volume_ondisk *ondisk;
193         struct nlookupdata nd;
194         struct buf *bp = NULL;
195         int error;
196         int ronly;
197
198         mp = hmp->mp;
199         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
200
201         /*
202          * Allocate a volume structure
203          */
204         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
205         volume->vol_name = kstrdup(volname, M_HAMMER);
206         volume->hmp = hmp;
207         volume->io.type = HAMMER_STRUCTURE_VOLUME;
208         volume->io.offset = 0LL;
209
210         /*
211          * Get the device vnode
212          */
213         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
214         if (error == 0)
215                 error = nlookup(&nd);
216         if (error == 0)
217                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
218         nlookup_done(&nd);
219         if (error == 0) {
220                 vn_isdisk(volume->devvp, &error);
221         }
222         if (error == 0) {
223                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
224                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
225                                  FSCRED, NULL);
226                 vn_unlock(volume->devvp);
227         }
228         if (error) {
229                 hammer_free_volume(volume);
230                 return(error);
231         }
232
233         /*
234          * Extract the volume number from the volume header and do various
235          * sanity checks.
236          */
237         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
238         if (error)
239                 goto late_failure;
240         ondisk = (void *)bp->b_data;
241         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
242                 kprintf("hammer_mount: volume %s has an invalid header\n",
243                         volume->vol_name);
244                 error = EFTYPE;
245                 goto late_failure;
246         }
247         volume->vol_no = ondisk->vol_no;
248         volume->cluster_base = ondisk->vol_clo_beg;
249         volume->vol_clsize = ondisk->vol_clsize;
250         volume->vol_flags = ondisk->vol_flags;
251         RB_INIT(&volume->rb_clus_root);
252         RB_INIT(&volume->rb_scls_root);
253
254         if (RB_EMPTY(&hmp->rb_vols_root)) {
255                 hmp->fsid = ondisk->vol_fsid;
256         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
257                 kprintf("hammer_mount: volume %s's fsid does not match "
258                         "other volumes\n", volume->vol_name);
259                 error = EFTYPE;
260                 goto late_failure;
261         }
262
263         /*
264          * Insert the volume structure into the red-black tree.
265          */
266         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
267                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
268                         volume->vol_name, volume->vol_no);
269                 error = EEXIST;
270         }
271
272         /*
273          * Set the root volume and load the root cluster.  HAMMER special
274          * cases rootvol and rootcl and will not deallocate the structures.
275          * We do not hold a ref because this would prevent related I/O
276          * from being flushed.
277          */
278         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
279                 hmp->rootvol = volume;
280                 hmp->rootcl = hammer_get_cluster(volume,
281                                                  ondisk->vol0_root_clu_no,
282                                                  &error, 0);
283                 hammer_rel_cluster(hmp->rootcl, 0);
284                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
285         }
286 late_failure:
287         if (bp)
288                 brelse(bp);
289         if (error) {
290                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
291                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
292                 hammer_free_volume(volume);
293         }
294         return (error);
295 }
296
297 /*
298  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
299  * so returns -1 on failure.
300  */
301 int
302 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
303 {
304         struct hammer_mount *hmp = volume->hmp;
305         hammer_cluster_t rootcl;
306         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
307
308         /*
309          * Sync clusters, sync volume
310          */
311
312
313         /*
314          * Clean up the root cluster, which is held unlocked in the root
315          * volume.
316          */
317         if (hmp->rootvol == volume) {
318                 if ((rootcl = hmp->rootcl) != NULL)
319                         hmp->rootcl = NULL;
320                 hmp->rootvol = NULL;
321         }
322
323         /*
324          * Unload clusters and super-clusters.  Unloading a super-cluster
325          * also unloads related clusters, but the filesystem may not be
326          * using super-clusters so unload clusters anyway.
327          */
328         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
329                         hammer_unload_cluster, NULL);
330         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
331                         hammer_unload_supercl, NULL);
332
333         /*
334          * Release our buffer and flush anything left in the buffer cache.
335          */
336         hammer_io_release(&volume->io, 1);
337
338         /*
339          * There should be no references on the volume.
340          */
341         KKASSERT(volume->io.lock.refs == 0);
342
343         volume->ondisk = NULL;
344         if (volume->devvp) {
345                 if (ronly) {
346                         vinvalbuf(volume->devvp, 0, 0, 0);
347                         VOP_CLOSE(volume->devvp, FREAD);
348                 } else {
349                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
350                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
351                 }
352         }
353
354         /*
355          * Destroy the structure
356          */
357         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
358         hammer_free_volume(volume);
359         return(0);
360 }
361
362 static
363 void
364 hammer_free_volume(hammer_volume_t volume)
365 {
366         if (volume->vol_name) {
367                 kfree(volume->vol_name, M_HAMMER);
368                 volume->vol_name = NULL;
369         }
370         if (volume->devvp) {
371                 vrele(volume->devvp);
372                 volume->devvp = NULL;
373         }
374         kfree(volume, M_HAMMER);
375 }
376
377 /*
378  * Get a HAMMER volume.  The volume must already exist.
379  */
380 hammer_volume_t
381 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
382 {
383         struct hammer_volume *volume;
384
385         /*
386          * Locate the volume structure
387          */
388         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
389         if (volume == NULL) {
390                 *errorp = ENOENT;
391                 return(NULL);
392         }
393         hammer_ref(&volume->io.lock);
394
395         /*
396          * Deal with on-disk info
397          */
398         if (volume->ondisk == NULL) {
399                 *errorp = hammer_load_volume(volume);
400                 if (*errorp) {
401                         hammer_rel_volume(volume, 1);
402                         volume = NULL;
403                 }
404         } else {
405                 *errorp = 0;
406         }
407         return(volume);
408 }
409
410 hammer_volume_t
411 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
412 {
413         hammer_volume_t volume;
414
415         volume = hmp->rootvol;
416         KKASSERT(volume != NULL);
417         hammer_ref(&volume->io.lock);
418
419         /*
420          * Deal with on-disk info
421          */
422         if (volume->ondisk == NULL) {
423                 *errorp = hammer_load_volume(volume);
424                 if (*errorp) {
425                         hammer_rel_volume(volume, 1);
426                         volume = NULL;
427                 }
428         } else {
429                 *errorp = 0;
430         }
431         return (volume);
432 }
433
434 /*
435  * Load a volume's on-disk information.  The volume must be referenced and
436  * not locked.  We temporarily acquire an exclusive lock to interlock
437  * against releases or multiple get's.
438  */
439 static int
440 hammer_load_volume(hammer_volume_t volume)
441 {
442         struct hammer_volume_ondisk *ondisk;
443         int error;
444
445         hammer_lock_ex(&volume->io.lock);
446         if (volume->ondisk == NULL) {
447                 error = hammer_io_read(volume->devvp, &volume->io);
448                 if (error) {
449                         hammer_unlock(&volume->io.lock);
450                         return (error);
451                 }
452                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
453
454                 /*
455                  * Configure the volume's A-lists.  These are used to
456                  * allocate clusters.
457                  */
458                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
459                         volume->alist.config = &Vol_super_alist_config;
460                         volume->alist.meta = ondisk->vol_almeta.super;
461                         volume->alist.info = volume;
462                 } else {
463                         volume->alist.config = &Vol_normal_alist_config;
464                         volume->alist.meta = ondisk->vol_almeta.normal;
465                         volume->alist.info = NULL;
466                 }
467                 hammer_alist_init(&volume->alist);
468         } else {
469                 error = 0;
470         }
471         hammer_unlock(&volume->io.lock);
472         return(0);
473 }
474
475 /*
476  * Release a volume.  Call hammer_io_release on the last reference.  We have
477  * to acquire an exclusive lock to interlock against volume->ondisk tests
478  * in hammer_load_volume().
479  */
480 void
481 hammer_rel_volume(hammer_volume_t volume, int flush)
482 {
483         if (hammer_islastref(&volume->io.lock)) {
484                 hammer_lock_ex(&volume->io.lock);
485                 if (hammer_islastref(&volume->io.lock)) {
486                         volume->ondisk = NULL;
487                         hammer_io_release(&volume->io, flush);
488                 }
489                 hammer_unlock(&volume->io.lock);
490         }
491         hammer_unref(&volume->io.lock);
492 }
493
494 /************************************************************************
495  *                              SUPER-CLUSTERS                          *
496  ************************************************************************
497  *
498  * Manage super-clusters.  Note that a supercl holds a reference to its
499  * associated volume.
500  */
501 hammer_supercl_t
502 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
503                    int *errorp, int isnew)
504 {
505         hammer_supercl_t supercl;
506
507         /*
508          * Locate and lock the super-cluster structure, creating one
509          * if necessary.
510          */
511 again:
512         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
513         if (supercl == NULL) {
514                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
515                 supercl->scl_no = scl_no;
516                 supercl->volume = volume;
517                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
518                 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
519                 hammer_ref(&supercl->io.lock);
520
521                 /*
522                  * Insert the cluster into the RB tree and handle late
523                  * collisions.
524                  */
525                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
526                         hammer_unref(&supercl->io.lock);
527                         kfree(supercl, M_HAMMER);
528                         goto again;
529                 }
530                 hammer_ref(&volume->io.lock);
531         } else {
532                 hammer_ref(&supercl->io.lock);
533         }
534
535         /*
536          * Deal with on-disk info
537          */
538         if (supercl->ondisk == NULL || isnew) {
539                 *errorp = hammer_load_supercl(supercl, isnew);
540                 if (*errorp) {
541                         hammer_rel_supercl(supercl, 1);
542                         supercl = NULL;
543                 }
544         } else {
545                 *errorp = 0;
546         }
547         return(supercl);
548 }
549
550 static int
551 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
552 {
553         struct hammer_supercl_ondisk *ondisk;
554         hammer_volume_t volume = supercl->volume;
555         int error;
556
557         hammer_lock_ex(&supercl->io.lock);
558         if (supercl->ondisk == NULL) {
559                 if (isnew)
560                         error = hammer_io_new(volume->devvp, &supercl->io);
561                 else
562                         error = hammer_io_read(volume->devvp, &supercl->io);
563                 if (error) {
564                         hammer_unlock(&supercl->io.lock);
565                         return (error);
566                 }
567                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
568
569                 supercl->alist.config = &Supercl_alist_config;
570                 supercl->alist.meta = ondisk->scl_meta;
571                 supercl->alist.info = NULL;
572
573                 /*
574                  * If this is a new super-cluster we have to initialize
575                  * various ondisk structural elements.  The caller is
576                  * responsible for the remainder.
577                  */
578                 if (isnew) {
579                         struct hammer_alist_live dummy;
580
581                         dummy.config = &Buf_alist_config;
582                         dummy.meta = ondisk->head.buf_almeta;
583                         dummy.info = NULL;
584                         initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
585                         hammer_alist_init(&supercl->alist);
586                 }
587         } else if (isnew) {
588                 error = hammer_io_new(volume->devvp, &supercl->io);
589         } else {
590                 error = 0;
591         }
592         hammer_unlock(&supercl->io.lock);
593         return (error);
594 }
595
596 /*
597  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
598  */
599 int
600 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
601 {
602         KKASSERT(supercl->io.lock.refs == 0);
603         hammer_ref(&supercl->io.lock);
604         hammer_io_release(&supercl->io, 1);
605         hammer_rel_supercl(supercl, 1);
606         return(0);
607 }
608
609 /*
610  * Release a super-cluster.  We have to deal with several places where
611  * another thread can ref the super-cluster.
612  *
613  * Only destroy the structure itself if the related buffer cache buffer
614  * was disassociated from it.  This ties the management of the structure
615  * to the buffer cache subsystem.
616  */
617 void
618 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
619 {
620         hammer_volume_t volume;
621
622         if (hammer_islastref(&supercl->io.lock)) {
623                 hammer_lock_ex(&supercl->io.lock);
624                 if (hammer_islastref(&supercl->io.lock)) {
625                         supercl->ondisk = NULL;
626                         hammer_io_release(&supercl->io, flush);
627                         if (supercl->io.bp == NULL &&
628                             hammer_islastref(&supercl->io.lock)) {
629                                 volume = supercl->volume;
630                                 RB_REMOVE(hammer_scl_rb_tree,
631                                           &volume->rb_scls_root, supercl);
632                                 supercl->volume = NULL; /* sanity */
633                                 kfree(supercl, M_HAMMER);
634                                 hammer_rel_volume(volume, 0);
635                                 return;
636                         }
637                 }
638                 hammer_unlock(&supercl->io.lock);
639         }
640         hammer_unref(&supercl->io.lock);
641 }
642
643 /************************************************************************
644  *                              CLUSTERS                                *
645  ************************************************************************
646  *
647  * Manage clusters.  Note that a cluster holds a reference to its
648  * associated volume.
649  */
650 hammer_cluster_t
651 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
652                    int *errorp, int isnew)
653 {
654         hammer_cluster_t cluster;
655
656 again:
657         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
658         if (cluster == NULL) {
659                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
660                 cluster->clu_no = clu_no;
661                 cluster->volume = volume;
662                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
663                 RB_INIT(&cluster->rb_bufs_root);
664                 RB_INIT(&cluster->rb_nods_root);
665                 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
666                 hammer_ref(&cluster->io.lock);
667
668                 /*
669                  * Insert the cluster into the RB tree and handle late
670                  * collisions.
671                  */
672                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
673                         hammer_unref(&cluster->io.lock);
674                         kfree(cluster, M_HAMMER);
675                         goto again;
676                 }
677                 hammer_ref(&volume->io.lock);
678         } else {
679                 hammer_ref(&cluster->io.lock);
680         }
681
682         /*
683          * Deal with on-disk info
684          */
685         if (cluster->ondisk == NULL || isnew) {
686                 *errorp = hammer_load_cluster(cluster, isnew);
687                 if (*errorp) {
688                         hammer_rel_cluster(cluster, 1);
689                         cluster = NULL;
690                 }
691         } else {
692                 *errorp = 0;
693         }
694         return (cluster);
695 }
696
697 hammer_cluster_t
698 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
699 {
700         hammer_cluster_t cluster;
701
702         cluster = hmp->rootcl;
703         KKASSERT(cluster != NULL);
704         hammer_ref(&cluster->io.lock);
705
706         /*
707          * Deal with on-disk info
708          */
709         if (cluster->ondisk == NULL) {
710                 *errorp = hammer_load_cluster(cluster, 0);
711                 if (*errorp) {
712                         hammer_rel_cluster(cluster, 1);
713                         cluster = NULL;
714                 }
715         } else {
716                 *errorp = 0;
717         }
718         return (cluster);
719 }
720
721 static
722 int
723 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
724 {
725         hammer_volume_t volume = cluster->volume;
726         struct hammer_cluster_ondisk *ondisk;
727         int error;
728
729         /*
730          * Load the cluster's on-disk info
731          */
732         hammer_lock_ex(&cluster->io.lock);
733         if (cluster->ondisk == NULL) {
734                 if (isnew)
735                         error = hammer_io_new(volume->devvp, &cluster->io);
736                 else
737                         error = hammer_io_read(volume->devvp, &cluster->io);
738                 if (error) {
739                         hammer_unlock(&cluster->io.lock);
740                         return (error);
741                 }
742                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
743
744                 cluster->alist_master.config = &Clu_master_alist_config;
745                 cluster->alist_master.meta = ondisk->clu_master_meta;
746                 cluster->alist_btree.config = &Clu_slave_alist_config;
747                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
748                 cluster->alist_btree.info = cluster;
749                 cluster->alist_record.config = &Clu_slave_alist_config;
750                 cluster->alist_record.meta = ondisk->clu_record_meta;
751                 cluster->alist_record.info = cluster;
752                 cluster->alist_mdata.config = &Clu_slave_alist_config;
753                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
754                 cluster->alist_mdata.info = cluster;
755
756                 cluster->clu_btree_beg = ondisk->clu_btree_beg;
757                 cluster->clu_btree_end = ondisk->clu_btree_end;
758
759                 /*
760                  * If this is a new cluster we have to initialize
761                  * various ondisk structural elements.  The caller is
762                  * responsible for the remainder.
763                  */
764                 if (isnew) {
765                         struct hammer_alist_live dummy;
766
767                         dummy.config = &Buf_alist_config;
768                         dummy.meta = ondisk->head.buf_almeta;
769                         dummy.info = NULL;
770                         initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
771
772                         hammer_alist_init(&cluster->alist_master);
773                         hammer_alist_init(&cluster->alist_btree);
774                         hammer_alist_init(&cluster->alist_record);
775                         hammer_alist_init(&cluster->alist_mdata);
776                 }
777         } else if (isnew) {
778                 error = hammer_io_new(volume->devvp, &cluster->io);
779         } else {
780                 error = 0;
781         }
782         hammer_unlock(&cluster->io.lock);
783         return (error);
784 }
785
786 /*
787  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
788  */
789 int
790 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
791 {
792         hammer_ref(&cluster->io.lock);
793         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
794                         hammer_unload_buffer, NULL);
795         KKASSERT(cluster->io.lock.refs == 1);
796         hammer_io_release(&cluster->io, 1);
797         hammer_rel_cluster(cluster, 1);
798         return(0);
799 }
800
801 /*
802  * Reference a cluster that is either already referenced or via a specially
803  * handled pointer (aka rootcl).
804  */
805 int
806 hammer_ref_cluster(hammer_cluster_t cluster)
807 {
808         int error;
809
810         KKASSERT(cluster != NULL);
811         hammer_ref(&cluster->io.lock);
812
813         /*
814          * Deal with on-disk info
815          */
816         if (cluster->ondisk == NULL) {
817                 error = hammer_load_cluster(cluster, 0);
818                 if (error)
819                         hammer_rel_cluster(cluster, 1);
820         } else {
821                 error = 0;
822         }
823         return(error);
824 }
825
826 /*
827  * Release a cluster.  We have to deal with several places where
828  * another thread can ref the cluster.
829  *
830  * Only destroy the structure itself if the related buffer cache buffer
831  * was disassociated from it.  This ties the management of the structure
832  * to the buffer cache subsystem.
833  */
834 void
835 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
836 {
837         hammer_node_t node;
838         hammer_volume_t volume;
839
840         if (hammer_islastref(&cluster->io.lock)) {
841                 hammer_lock_ex(&cluster->io.lock);
842                 if (hammer_islastref(&cluster->io.lock)) {
843                         cluster->ondisk = NULL;
844                         hammer_io_release(&cluster->io, flush);
845
846                         /*
847                          * Clean out the B-Tree node cache, if any, then
848                          * clean up the volume ref and free the cluster.
849                          *
850                          * If the cluster acquires a new reference while we
851                          * are trying to clean it out, abort the cleaning.
852                          *
853                          * There really shouldn't be any nodes at this point
854                          * but we allow a node with no buffer association
855                          * so handle the case.
856                          */
857                         while (cluster->io.bp == NULL &&
858                                hammer_islastref(&cluster->io.lock) &&
859                                (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
860                         ) {
861                                 KKASSERT(node->lock.refs == 0);
862                                 hammer_flush_node(node);
863                         }
864                         if (cluster->io.bp == NULL &&
865                             hammer_islastref(&cluster->io.lock)) {
866                                 volume = cluster->volume;
867                                 RB_REMOVE(hammer_clu_rb_tree,
868                                           &volume->rb_clus_root, cluster);
869                                 cluster->volume = NULL; /* sanity */
870                                 kfree(cluster, M_HAMMER);
871                                 hammer_rel_volume(volume, 0);
872                                 return;
873                         }
874                 }
875                 hammer_unlock(&cluster->io.lock);
876         }
877         hammer_unref(&cluster->io.lock);
878 }
879
880 /************************************************************************
881  *                              BUFFERS                                 *
882  ************************************************************************
883  *
884  * Manage buffers.  Note that a buffer holds a reference to its associated
885  * cluster, and its cluster will hold a reference to the cluster's volume.
886  *
887  * A non-zero buf_type indicates that a new buffer should be created and
888  * zero'd.
889  */
890 hammer_buffer_t
891 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
892                   u_int64_t buf_type, int *errorp)
893 {
894         hammer_buffer_t buffer;
895
896         /*
897          * Find the buffer.  Note that buffer 0 corresponds to the cluster
898          * header and should never be requested.
899          */
900         KKASSERT(buf_no != 0);
901
902         /*
903          * Locate and lock the buffer structure, creating one if necessary.
904          */
905 again:
906         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
907         if (buffer == NULL) {
908                 buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
909                 buffer->buf_no = buf_no;
910                 buffer->cluster = cluster;
911                 buffer->volume = cluster->volume;
912                 buffer->io.offset = cluster->io.offset +
913                                     (buf_no * HAMMER_BUFSIZE);
914                 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
915                 TAILQ_INIT(&buffer->clist);
916                 hammer_ref(&buffer->io.lock);
917
918                 /*
919                  * Insert the cluster into the RB tree and handle late
920                  * collisions.
921                  */
922                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
923                         hammer_unref(&buffer->io.lock);
924                         kfree(buffer, M_HAMMER);
925                         goto again;
926                 }
927                 hammer_ref(&cluster->io.lock);
928         } else {
929                 hammer_ref(&buffer->io.lock);
930         }
931
932         /*
933          * Deal with on-disk info
934          */
935         if (buffer->ondisk == NULL || buf_type) {
936                 *errorp = hammer_load_buffer(buffer, buf_type);
937                 if (*errorp) {
938                         hammer_rel_buffer(buffer, 1);
939                         buffer = NULL;
940                 }
941         } else {
942                 *errorp = 0;
943         }
944         return(buffer);
945 }
946
947 static int
948 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
949 {
950         hammer_volume_t volume;
951         hammer_fsbuf_ondisk_t ondisk;
952         int error;
953
954         /*
955          * Load the buffer's on-disk info
956          */
957         volume = buffer->volume;
958         hammer_lock_ex(&buffer->io.lock);
959         if (buffer->ondisk == NULL) {
960                 if (buf_type) {
961                         error = hammer_io_new(volume->devvp, &buffer->io);
962                 } else {
963                         error = hammer_io_read(volume->devvp, &buffer->io);
964                 }
965                 if (error) {
966                         hammer_unlock(&buffer->io.lock);
967                         return (error);
968                 }
969                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
970                 buffer->alist.config = &Buf_alist_config;
971                 buffer->alist.meta = ondisk->head.buf_almeta;
972
973                 if (buf_type) {
974                         initbuffer(&buffer->alist, &ondisk->head, buf_type);
975                 }
976                 buffer->buf_type = ondisk->head.buf_type;
977         } else if (buf_type) {
978                 error = hammer_io_new(volume->devvp, &buffer->io);
979         } else {
980                 error = 0;
981         }
982         hammer_unlock(&buffer->io.lock);
983         return (error);
984 }
985
986 /*
987  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
988  */
989 int
990 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
991 {
992         hammer_ref(&buffer->io.lock);
993         hammer_flush_buffer_nodes(buffer);
994         hammer_io_release(&buffer->io, 1);
995         KKASSERT(buffer->io.lock.refs == 1);
996         hammer_rel_buffer(buffer, 1);
997         return(0);
998 }
999
1000 /*
1001  * Reference a buffer that is either already referenced or via a specially
1002  * handled pointer (aka cursor->buffer).
1003  */
1004 int
1005 hammer_ref_buffer(hammer_buffer_t buffer)
1006 {
1007         int error;
1008
1009         hammer_ref(&buffer->io.lock);
1010         if (buffer->ondisk == NULL) {
1011                 error = hammer_load_buffer(buffer, 0);
1012                 if (error) {
1013                         hammer_rel_buffer(buffer, 1);
1014                         /*
1015                          * NOTE: buffer pointer can become stale after
1016                          * the above release.
1017                          */
1018                 } else {
1019                         KKASSERT(buffer->buf_type ==
1020                                  buffer->ondisk->head.buf_type);
1021                 }
1022         } else {
1023                 error = 0;
1024         }
1025         return(error);
1026 }
1027
1028 /*
1029  * Release a buffer.  We have to deal with several places where
1030  * another thread can ref the buffer.
1031  *
1032  * Only destroy the structure itself if the related buffer cache buffer
1033  * was disassociated from it.  This ties the management of the structure
1034  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1035  * embedded io is referenced or not.
1036  */
1037 void
1038 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1039 {
1040         hammer_cluster_t cluster;
1041         hammer_node_t node;
1042
1043         if (hammer_islastref(&buffer->io.lock)) {
1044                 hammer_lock_ex(&buffer->io.lock);
1045                 if (hammer_islastref(&buffer->io.lock)) {
1046                         buffer->ondisk = NULL;
1047                         hammer_io_release(&buffer->io, flush);
1048
1049                         /*
1050                          * Clean out the B-Tree node cache, if any, then
1051                          * clean up the cluster ref and free the buffer.
1052                          *
1053                          * If the buffer acquires a new reference while we
1054                          * are trying to clean it out, abort the cleaning.
1055                          */
1056                         while (buffer->io.bp == NULL &&
1057                                hammer_islastref(&buffer->io.lock) &&
1058                                (node = TAILQ_FIRST(&buffer->clist)) != NULL
1059                         ) {
1060                                 KKASSERT(node->lock.refs == 0);
1061                                 hammer_flush_node(node);
1062                         }
1063                         if (buffer->io.bp == NULL &&
1064                             hammer_islastref(&buffer->io.lock)) {
1065                                 cluster = buffer->cluster;
1066                                 RB_REMOVE(hammer_buf_rb_tree,
1067                                           &cluster->rb_bufs_root, buffer);
1068                                 buffer->cluster = NULL; /* sanity */
1069                                 kfree(buffer, M_HAMMER);
1070                                 hammer_rel_cluster(cluster, 0);
1071                                 return;
1072                         }
1073                 }
1074                 hammer_unlock(&buffer->io.lock);
1075         }
1076         hammer_unref(&buffer->io.lock);
1077 }
1078
1079 /*
1080  * Flush passively cached B-Tree nodes associated with this buffer.
1081  *
1082  * NOTE: The buffer is referenced and locked.
1083  */
1084 void
1085 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1086 {
1087         hammer_node_t node;
1088
1089         node = TAILQ_FIRST(&buffer->clist);
1090         while (node) {
1091                 buffer->save_scan = TAILQ_NEXT(node, entry);
1092                 if (node->lock.refs == 0)
1093                         hammer_flush_node(node);
1094                 node = buffer->save_scan;
1095         }
1096 }
1097
1098 /************************************************************************
1099  *                              NODES                                   *
1100  ************************************************************************
1101  *
1102  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1103  * method used by the HAMMER filesystem.
1104  *
1105  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1106  * associated with its buffer.  It can have an active buffer reference
1107  * even when the node itself has no references.  The node also passively
1108  * associates itself with its cluster without holding any cluster refs.
1109  * The cluster ref is indirectly maintained by the active buffer ref when
1110  * a node is acquired.
1111  *
1112  * A hammer_node can also be passively associated with other HAMMER
1113  * structures, such as inodes, while retaining 0 references.  These
1114  * associations can be cleared backwards using a pointer-to-pointer in
1115  * the hammer_node.
1116  *
1117  * This allows the HAMMER implementation to cache hammer_node's long-term
1118  * and short-cut a great deal of the infrastructure's complexity.  In
1119  * most cases a cached node can be reacquired without having to dip into
1120  * either the buffer or cluster management code.
1121  *
1122  * The caller must pass a referenced cluster on call and will retain
1123  * ownership of the reference on return.  The node will acquire its own
1124  * additional references, if necessary.
1125  */
1126 hammer_node_t
1127 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1128 {
1129         hammer_node_t node;
1130
1131         /*
1132          * Locate the structure, allocating one if necessary.
1133          */
1134 again:
1135         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1136                          node_offset);
1137         if (node == NULL) {
1138                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1139                 node->node_offset = node_offset;
1140                 node->cluster = cluster;
1141                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1142                               node)) {
1143                         kfree(node, M_HAMMER);
1144                         goto again;
1145                 }
1146         }
1147         *errorp = hammer_ref_node(node);
1148         if (*errorp) {
1149                 /*
1150                  * NOTE: The node pointer may be stale on error return.
1151                  * In fact, its probably been destroyed.
1152                  */
1153                 node = NULL;
1154         }
1155         return(node);
1156 }
1157
1158 /*
1159  * Reference the node to prevent disassociations, then associate and
1160  * load the related buffer.  This routine can also be called to reference
1161  * a node from a cache pointer.
1162  *
1163  * NOTE: Because the caller does not have a ref on the node, the caller's
1164  * node pointer will be stale if an error is returned.  We may also wind
1165  * up clearing the related cache pointers.
1166  *
1167  * NOTE: The cluster is indirectly referenced by our buffer ref.
1168  */
1169 int
1170 hammer_ref_node(hammer_node_t node)
1171 {
1172         hammer_buffer_t buffer;
1173         int32_t buf_no;
1174         int error;
1175
1176         hammer_ref(&node->lock);
1177         error = 0;
1178         if (node->ondisk == NULL) {
1179                 hammer_lock_ex(&node->lock);
1180                 if (node->ondisk == NULL) {
1181                         /*
1182                          * This is a little confusing but the jist is that
1183                          * node->buffer determines whether the node is on
1184                          * the buffer's clist and node->ondisk determines
1185                          * whether the buffer is referenced.
1186                          */
1187                         if ((buffer = node->buffer) != NULL) {
1188                                 error = hammer_ref_buffer(buffer);
1189                         } else {
1190                                 buf_no = node->node_offset / HAMMER_BUFSIZE;
1191                                 buffer = hammer_get_buffer(node->cluster,
1192                                                            buf_no, 0, &error);
1193                                 if (buffer) {
1194                                         KKASSERT(error == 0);
1195                                         TAILQ_INSERT_TAIL(&buffer->clist,
1196                                                           node, entry);
1197                                         node->buffer = buffer;
1198                                 }
1199                         }
1200                         if (error == 0) {
1201                                 node->ondisk = (void *)((char *)buffer->ondisk +
1202                                        (node->node_offset & HAMMER_BUFMASK));
1203                         }
1204                 }
1205                 hammer_unlock(&node->lock);
1206         }
1207         if (error)
1208                 hammer_rel_node(node);
1209         return (error);
1210 }
1211
1212 /*
1213  * Release a hammer_node.  The node retains a passive association with
1214  * its cluster, buffer and caches.
1215  *
1216  * However, to avoid cluttering up kernel memory with tons of B-Tree
1217  * node cache structures we destroy the node if no passive cache or
1218  * (instantiated) buffer references exist.
1219  */
1220 void
1221 hammer_rel_node(hammer_node_t node)
1222 {
1223         hammer_cluster_t cluster;
1224         hammer_buffer_t buffer;
1225
1226         if (hammer_islastref(&node->lock)) {
1227                 cluster = node->cluster;
1228                 /*
1229                  * Clutter control, this case only occurs after a failed
1230                  * load since otherwise ondisk will be non-NULL.
1231                  */
1232                 if (node->cache1 == NULL && node->cache2 == NULL && 
1233                     node->ondisk == NULL) {
1234                         RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1235                                   node);
1236                         if ((buffer = node->buffer) != NULL) {
1237                                 node->buffer = NULL;
1238                                 hammer_remove_node_clist(buffer, node);
1239                         }
1240                         kfree(node, M_HAMMER);
1241                         return;
1242                 }
1243
1244                 /*
1245                  * node->ondisk determines whether we have a buffer reference
1246                  * to get rid of or not.  Only get rid of the reference if
1247                  * the kernel tried to flush the buffer.
1248                  *
1249                  * NOTE: Once unref'd the node can be physically destroyed,
1250                  * so our node is stale afterwords.
1251                  *
1252                  * This case occurs if the node still has cache references.
1253                  * We could remove the references and free the structure
1254                  * but for now we allow them (and the node structure) to
1255                  * remain intact.
1256                  */
1257                 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1258                         buffer = node->buffer;
1259                         node->buffer = NULL;
1260                         node->ondisk = NULL;
1261                         hammer_remove_node_clist(buffer, node);
1262                         hammer_unref(&node->lock);
1263                         hammer_rel_buffer(buffer, 0);
1264                 } else {
1265                         hammer_unref(&node->lock);
1266                 }
1267         } else {
1268                 hammer_unref(&node->lock);
1269         }
1270 }
1271
1272 /*
1273  * Cache-and-release a hammer_node.  Kinda like catching and releasing a
1274  * fish, but keeping an eye on him.  The node is passively cached in *cache.
1275  *
1276  * NOTE!  HAMMER may NULL *cache at any time, even after you have
1277  * referenced the node!
1278  */
1279 void
1280 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1281 {
1282         if (node->cache1 != cache) {
1283                 if (node->cache2 == cache) {
1284                         struct hammer_node **tmp;
1285                         tmp = node->cache1;
1286                         node->cache1 = node->cache2;
1287                         node->cache2 = tmp;
1288                 } else {
1289                         if (node->cache2)
1290                                 *node->cache2 = NULL;
1291                         node->cache2 = node->cache1;
1292                         node->cache1 = cache;
1293                         *cache = node;
1294                 }
1295         }
1296 }
1297
1298 void
1299 hammer_uncache_node(struct hammer_node **cache)
1300 {
1301         hammer_node_t node;
1302
1303         if ((node = *cache) != NULL) {
1304                 *cache = NULL;
1305                 if (node->cache1 == cache) {
1306                         node->cache1 = node->cache2;
1307                         node->cache2 = NULL;
1308                 } else if (node->cache2 == cache) {
1309                         node->cache2 = NULL;
1310                 } else {
1311                         panic("hammer_uncache_node: missing cache linkage");
1312                 }
1313                 if (node->cache1 == NULL && node->cache2 == NULL &&
1314                     node->lock.refs == 0) {
1315                         hammer_flush_node(node);
1316                 }
1317         }
1318 }
1319
1320 /*
1321  * Remove a node's cache references and destroy the node if it has no
1322  * references.  This is typically called from the buffer handling code.
1323  *
1324  * The node may have an active buffer reference (ondisk != NULL) even
1325  * if the node itself has no references.
1326  *
1327  * Note that a caller iterating through nodes via a buffer must have its
1328  * own reference on the buffer or our hammer_rel_buffer() call below may
1329  * rip it out from under the caller.
1330  */
1331 void
1332 hammer_flush_node(hammer_node_t node)
1333 {
1334         hammer_buffer_t buffer;
1335
1336         if (node->cache1)
1337                 *node->cache1 = NULL;
1338         if (node->cache2)
1339                 *node->cache2 = NULL;
1340         if (node->lock.refs == 0) {
1341                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1342                           node);
1343                 if ((buffer = node->buffer) != NULL) {
1344                         node->buffer = NULL;
1345                         hammer_remove_node_clist(buffer, node);
1346                         if (node->ondisk) {
1347                                 node->ondisk = NULL;
1348                                 hammer_rel_buffer(buffer, 0);
1349                         }
1350                 }
1351                 kfree(node, M_HAMMER);
1352         }
1353 }
1354
1355 /*
1356  * Remove a node from the buffer's clist.  Adjust save_scan as appropriate.
1357  * This is in its own little routine to properly handle interactions with
1358  * save_scan, so it is possible to block while scanning a buffer's node list.
1359  */
1360 static
1361 void
1362 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1363 {
1364         if (buffer->save_scan == node)
1365                 buffer->save_scan = TAILQ_NEXT(node, entry);
1366         TAILQ_REMOVE(&buffer->clist, node, entry);
1367 }
1368
1369 /************************************************************************
1370  *                              A-LIST ALLOCATORS                       *
1371  ************************************************************************/
1372
1373 /*
1374  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1375  *
1376  * The passed *bufferp should be initialized to NULL.  On successive calls
1377  * *bufferp caches the most recent buffer used until put away by the caller.
1378  * Note that previously returned pointers using the cached buffer become
1379  * invalid on successive calls which reuse *bufferp.
1380  *
1381  * All allocations first attempt to use the block found at the specified
1382  * iterator.  If that fails the first available block is used.  If that
1383  * fails a new buffer is allocated and associated with the buffer type
1384  * A-list and the element is allocated out of the new buffer.
1385  */
1386
1387 hammer_node_t
1388 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1389 {
1390         hammer_buffer_t buffer;
1391         hammer_alist_t live;
1392         hammer_node_t node;
1393         int32_t elm_no;
1394         int32_t buf_no;
1395         int32_t node_offset;
1396
1397         /*
1398          * Allocate a B-Tree element
1399          */
1400         buffer = NULL;
1401         live = &cluster->alist_btree;
1402         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1403         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1404                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1405         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1406                 alloc_new_buffer(cluster, live,
1407                                  HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1408                                  cluster->ondisk->idx_index, errorp, &buffer);
1409                 elm_no = hammer_alist_alloc(live, 1);
1410                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1411                         *errorp = ENOSPC;
1412                         if (buffer)
1413                                 hammer_rel_buffer(buffer, 0);
1414                         hammer_modify_cluster(cluster);
1415                         return(NULL);
1416                 }
1417         }
1418         cluster->ondisk->idx_index = elm_no;
1419         hammer_modify_cluster(cluster);
1420
1421         /*
1422          * Load and return the B-Tree element
1423          */
1424         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1425         node_offset = buf_no * HAMMER_BUFSIZE +
1426                       offsetof(union hammer_fsbuf_ondisk,
1427                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1428         node = hammer_get_node(cluster, node_offset, errorp);
1429         if (node) {
1430                 bzero(node->ondisk, sizeof(*node->ondisk));
1431         } else {
1432                 hammer_alist_free(live, elm_no, 1);
1433                 hammer_rel_node(node);
1434                 node = NULL;
1435         }
1436         if (buffer)
1437                 hammer_rel_buffer(buffer, 0);
1438         return(node);
1439 }
1440
1441 void *
1442 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1443                   int *errorp, struct hammer_buffer **bufferp)
1444 {
1445         hammer_buffer_t buffer;
1446         hammer_alist_t live;
1447         int32_t elm_no;
1448         int32_t buf_no;
1449         int32_t nblks;
1450         void *item;
1451
1452         /*
1453          * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
1454          * for these allocations.
1455          */
1456         if ((bytes & HAMMER_BUFMASK) == 0) {
1457                 nblks = bytes / HAMMER_BUFSIZE;
1458                 /* only one block allowed for now (so buffer can hold it) */
1459                 KKASSERT(nblks == 1);
1460
1461                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1462                                                 nblks,
1463                                                 cluster->ondisk->idx_ldata);
1464                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1465                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1466                                                 nblks,
1467                                                 0);
1468                 }
1469                 hammer_modify_cluster(cluster);
1470                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1471                         *errorp = ENOSPC;
1472                         return(NULL);
1473                 }
1474                 cluster->ondisk->idx_ldata = buf_no;
1475                 buffer = *bufferp;
1476                 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1477                 if (buffer)
1478                         hammer_rel_buffer(buffer, 0);
1479                 buffer = *bufferp;
1480                 kprintf("allocate large buffer %p (%d)\n", buffer, buf_no);
1481                 return(buffer->ondisk);
1482         }
1483
1484         /*
1485          * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
1486          * (64 bytes) for these allocations.
1487          */
1488         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1489         live = &cluster->alist_mdata;
1490         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1491         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1492                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1493         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1494                 alloc_new_buffer(cluster, live,
1495                                  HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1496                                  cluster->ondisk->idx_data, errorp, bufferp);
1497                 elm_no = hammer_alist_alloc(live, nblks);
1498                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1499                         *errorp = ENOSPC;
1500                         hammer_modify_cluster(cluster);
1501                         return(NULL);
1502                 }
1503         }
1504         cluster->ondisk->idx_index = elm_no;
1505         hammer_modify_cluster(cluster);
1506
1507         /*
1508          * Load and return the B-Tree element
1509          */
1510         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1511         buffer = *bufferp;
1512         if (buffer == NULL || buffer->cluster != cluster ||
1513             buffer->buf_no != buf_no) {
1514                 if (buffer)
1515                         hammer_rel_buffer(buffer, 0);
1516                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1517                 *bufferp = buffer;
1518         }
1519         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1520         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1521         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1522         *errorp = 0;
1523         return(item);
1524 }
1525
1526 void *
1527 hammer_alloc_record(hammer_cluster_t cluster,
1528                     int *errorp, struct hammer_buffer **bufferp)
1529 {
1530         hammer_buffer_t buffer;
1531         hammer_alist_t live;
1532         int32_t elm_no;
1533         int32_t buf_no;
1534         void *item;
1535
1536         /*
1537          * Allocate a record element
1538          */
1539         live = &cluster->alist_record;
1540         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1541         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1542                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1543         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1544                 alloc_new_buffer(cluster, live,
1545                                  HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1546                                  cluster->ondisk->idx_record, errorp, bufferp);
1547                 elm_no = hammer_alist_alloc(live, 1);
1548                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1549                         *errorp = ENOSPC;
1550                         hammer_modify_cluster(cluster);
1551                         return(NULL);
1552                 }
1553         }
1554         cluster->ondisk->idx_record = elm_no;
1555         hammer_modify_cluster(cluster);
1556
1557         /*
1558          * Load and return the B-Tree element
1559          */
1560         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1561         buffer = *bufferp;
1562         if (buffer == NULL || buffer->cluster != cluster ||
1563             buffer->buf_no != buf_no) {
1564                 if (buffer)
1565                         hammer_rel_buffer(buffer, 0);
1566                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1567                 *bufferp = buffer;
1568         }
1569         KKASSERT(buffer->ondisk->head.buf_type != 0);
1570         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1571         bzero(item, sizeof(union hammer_record_ondisk));
1572         *errorp = 0;
1573         return(item);
1574 }
1575
1576 /*
1577  * Free HAMMER elements based on either a hammer_buffer and element pointer
1578  * or a cluster-relative byte offset.
1579  */
1580 void
1581 hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node)
1582 {
1583         int32_t elm_no;
1584         hammer_alist_t live;
1585
1586         elm_no = node - &buffer->ondisk->btree.nodes[0];
1587         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1588         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1589         live = &buffer->cluster->alist_btree;
1590         hammer_alist_free(live, elm_no, 1);
1591         hammer_modify_cluster(buffer->cluster);
1592 }
1593
1594 void
1595 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1596 {
1597         int32_t elm_no;
1598         int32_t nblks;
1599         hammer_alist_t live;
1600
1601         if ((bytes & HAMMER_BUFMASK) == 0) {
1602                 nblks = bytes / HAMMER_BUFSIZE;
1603                 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1604                 hammer_alist_free(&buffer->cluster->alist_master,
1605                                   buffer->buf_no, nblks);
1606                 hammer_modify_cluster(buffer->cluster);
1607                 return;
1608         }
1609
1610         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1611                  HAMMER_DATA_BLKSIZE;
1612         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1613         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1614         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1615         live = &buffer->cluster->alist_mdata;
1616         hammer_alist_free(live, elm_no, nblks);
1617         hammer_modify_cluster(buffer->cluster);
1618 }
1619
1620 void
1621 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1622 {
1623         int32_t elm_no;
1624         hammer_alist_t live;
1625
1626         elm_no = rec - &buffer->ondisk->record.recs[0];
1627         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1628         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1629         live = &buffer->cluster->alist_record;
1630         hammer_alist_free(live, elm_no, 1);
1631         hammer_modify_cluster(buffer->cluster);
1632 }
1633
1634 void
1635 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1636 {
1637         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1638         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1639         hammer_alist_t live;
1640         int32_t elm_no;
1641
1642         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1643         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1644         live = &cluster->alist_btree;
1645         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1646         elm_no += fsbuf_offset / blksize;
1647         hammer_alist_free(live, elm_no, 1);
1648         hammer_modify_cluster(cluster);
1649 }
1650
1651 void
1652 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1653 {
1654         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1655         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1656         hammer_alist_t live;
1657         int32_t elm_no;
1658         int32_t buf_no;
1659         int32_t nblks;
1660
1661         if ((bytes & HAMMER_BUFMASK) == 0) {
1662                 nblks = bytes / HAMMER_BUFSIZE;
1663                 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1664                 buf_no = bclu_offset / HAMMER_BUFSIZE;
1665                 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1666                 hammer_modify_cluster(cluster);
1667                 return;
1668         }
1669
1670         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1671         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1672         live = &cluster->alist_mdata;
1673         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1674         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1675         elm_no += fsbuf_offset / blksize;
1676         hammer_alist_free(live, elm_no, nblks);
1677         hammer_modify_cluster(cluster);
1678 }
1679
1680 void
1681 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1682 {
1683         const int32_t blksize = sizeof(union hammer_record_ondisk);
1684         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1685         hammer_alist_t live;
1686         int32_t elm_no;
1687
1688         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1689         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1690         live = &cluster->alist_record;
1691         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1692         elm_no += fsbuf_offset / blksize;
1693         hammer_alist_free(live, elm_no, 1);
1694         hammer_modify_cluster(cluster);
1695 }
1696
1697
1698 /*
1699  * Allocate a new filesystem buffer and assign it to the specified
1700  * filesystem buffer type.  The new buffer will be added to the
1701  * type-specific A-list and initialized.
1702  */
1703 static void
1704 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1705                  u_int64_t type, int32_t nelements,
1706                  int start, int *errorp, struct hammer_buffer **bufferp)
1707 {
1708         hammer_buffer_t buffer;
1709         int32_t buf_no;
1710
1711         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1712
1713         if (type == HAMMER_FSBUF_RECORDS) {
1714                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1715                                                 1, start);
1716                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1717                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1718                                                 1, HAMMER_ALIST_BLOCK_MAX);
1719                 }
1720         } else {
1721                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1722                                                 1, start);
1723                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1724                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1725                                                 1, 0);
1726                 }
1727         }
1728         KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1729         hammer_modify_cluster(cluster);
1730
1731         /*
1732          * The new buffer must be initialized (type != 0) regardless of
1733          * whether we already have it cached or not, so don't try to
1734          * optimize the cached buffer check.  Just call hammer_get_buffer().
1735          */
1736         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1737         if (*bufferp)
1738                 hammer_rel_buffer(*bufferp, 0);
1739         *bufferp = buffer;
1740 }
1741
1742 #if 0
1743
1744 /*
1745  * Flush various tracking structures to disk
1746  */
1747
1748 /*
1749  * Flush various tracking structures to disk
1750  */
1751 void
1752 flush_all_volumes(void)
1753 {
1754         hammer_volume_t vol;
1755
1756         for (vol = VolBase; vol; vol = vol->next)
1757                 flush_volume(vol);
1758 }
1759
1760 void
1761 flush_volume(hammer_volume_t vol)
1762 {
1763         hammer_supercl_t supercl;
1764         hammer_cluster_t cl;
1765
1766         for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1767                 flush_supercl(supercl);
1768         for (cl = vol->cluster_base; cl; cl = cl->next)
1769                 flush_cluster(cl);
1770         writehammerbuf(vol, vol->ondisk, 0);
1771 }
1772
1773 void
1774 flush_supercl(hammer_supercl_t supercl)
1775 {
1776         int64_t supercl_offset;
1777
1778         supercl_offset = supercl->scl_offset;
1779         writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1780 }
1781
1782 void
1783 flush_cluster(hammer_cluster_t cl)
1784 {
1785         hammer_buffer_t buf;
1786         int64_t cluster_offset;
1787
1788         for (buf = cl->buffer_base; buf; buf = buf->next)
1789                 flush_buffer(buf);
1790         cluster_offset = cl->clu_offset;
1791         writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1792 }
1793
1794 void
1795 flush_buffer(hammer_buffer_t buf)
1796 {
1797         int64_t buffer_offset;
1798
1799         buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1800         writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1801 }
1802
1803 #endif
1804
1805 /*
1806  * Generic buffer initialization
1807  */
1808 static void
1809 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1810 {
1811         head->buf_type = type;
1812         hammer_alist_init(live);
1813 }
1814
1815 /*
1816  * Calculate the cluster's offset in the volume.  This calculation is
1817  * slightly more complex when using superclusters because superclusters
1818  * are grouped in blocks of 16, followed by 16 x N clusters where N
1819  * is the number of clusters a supercluster can manage.
1820  */
1821 static int64_t
1822 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1823 {
1824         int32_t scl_group;
1825         int64_t scl_group_size;
1826         int64_t off;
1827
1828         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1829                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1830                             HAMMER_SCL_MAXCLUSTERS;
1831                 scl_group_size = 
1832                             ((int64_t)HAMMER_BUFSIZE *
1833                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1834                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1835                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1836                 scl_group_size += 
1837                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1838
1839                 off = volume->cluster_base +
1840                       scl_group * scl_group_size +
1841                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1842                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1843                        HAMMER_VOL_SUPERCLUSTER_GROUP))
1844                       * HAMMER_BUFSIZE;
1845         } else {
1846                 off = volume->cluster_base +
1847                       (int64_t)clu_no * volume->vol_clsize;
1848         }
1849         return(off);
1850 }
1851
1852 /*
1853  * Calculate a super-cluster's offset in the volume.
1854  */
1855 static int64_t
1856 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1857 {
1858         int64_t off;
1859         int32_t scl_group;
1860         int64_t scl_group_size;
1861
1862         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1863         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1864         if (scl_group) {
1865                 scl_group_size = 
1866                             ((int64_t)HAMMER_BUFSIZE *
1867                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1868                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1869                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1870                 scl_group_size += 
1871                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1872                 off = volume->cluster_base + (scl_group * scl_group_size) +
1873                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1874         } else {
1875                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1876         }
1877         return(off);
1878 }
1879
1880 /*
1881  * A-LIST SUPPORT
1882  *
1883  * Setup the parameters for the various A-lists we use in hammer.  The
1884  * supercluster A-list must be chained to the cluster A-list and cluster
1885  * slave A-lists are chained to buffer A-lists.
1886  *
1887  * See hammer_init_alist_config() below.
1888  */
1889
1890 /*
1891  * A-LIST - cluster recursion into a filesystem buffer
1892  */
1893 static int
1894 buffer_alist_init(void *info, int32_t blk, int32_t radix)
1895 {
1896         hammer_cluster_t cluster = info;
1897         hammer_buffer_t buffer;
1898         int32_t buf_no;
1899         int error = 0;
1900
1901         /*
1902          * Calculate the buffer number, initialize based on the buffer type.
1903          * The buffer has already been allocated so assert that it has been
1904          * initialized.
1905          */
1906         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1907         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1908         if (buffer)
1909                 hammer_rel_buffer(buffer, 0);
1910         return (error);
1911 }
1912
1913 static int
1914 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
1915 {
1916         return (0);
1917 }
1918
1919 static int
1920 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1921                       int32_t count, int32_t atblk, int32_t *fullp)
1922 {
1923         hammer_cluster_t cluster = info;
1924         hammer_buffer_t buffer;
1925         int32_t buf_no;
1926         int32_t r;
1927         int error = 0;
1928
1929         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1930         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1931         if (buffer) {
1932                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1933
1934                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
1935                 if (r != HAMMER_ALIST_BLOCK_NONE)
1936                         r += blk;
1937                 *fullp = hammer_alist_isfull(&buffer->alist);
1938                 hammer_modify_buffer(buffer);
1939                 hammer_rel_buffer(buffer, 0);
1940         } else {
1941                 r = HAMMER_ALIST_BLOCK_NONE;
1942         }
1943         return(r);
1944 }
1945
1946 static int
1947 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1948                       int32_t count, int32_t atblk, int32_t *fullp)
1949 {
1950         hammer_cluster_t cluster = info;
1951         hammer_buffer_t buffer;
1952         int32_t buf_no;
1953         int32_t r;
1954         int error = 0;
1955
1956         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1957         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1958         if (buffer) {
1959                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1960
1961                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
1962                 if (r != HAMMER_ALIST_BLOCK_NONE)
1963                         r += blk;
1964                 *fullp = hammer_alist_isfull(&buffer->alist);
1965                 hammer_modify_buffer(buffer);
1966                 hammer_rel_buffer(buffer, 0);
1967         } else {
1968                 r = HAMMER_ALIST_BLOCK_NONE;
1969                 *fullp = 0;
1970         }
1971         return(r);
1972 }
1973
1974 static void
1975 buffer_alist_free(void *info, int32_t blk, int32_t radix,
1976                  int32_t base_blk, int32_t count, int32_t *emptyp)
1977 {
1978         hammer_cluster_t cluster = info;
1979         hammer_buffer_t buffer;
1980         int32_t buf_no;
1981         int error = 0;
1982
1983         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1984         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1985         if (buffer) {
1986                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1987                 hammer_alist_free(&buffer->alist, base_blk, count);
1988                 *emptyp = hammer_alist_isempty(&buffer->alist);
1989                 hammer_modify_buffer(buffer);
1990                 hammer_rel_buffer(buffer, 0);
1991         } else {
1992                 *emptyp = 0;
1993         }
1994 }
1995
1996 static void
1997 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
1998 {
1999 }
2000
2001 /*
2002  * A-LIST - super-cluster recursion into a cluster and cluster recursion
2003  * into a filesystem buffer.  A-List's are mostly self-contained entities,
2004  * but callbacks must be installed to recurse from one A-List to another.
2005  *
2006  * Implementing these callbacks allows us to operate a multi-layered A-List
2007  * as a single entity.
2008  */
2009 static int
2010 super_alist_init(void *info, int32_t blk, int32_t radix)
2011 {
2012         hammer_volume_t volume = info;
2013         hammer_supercl_t supercl;
2014         int32_t scl_no;
2015         int error = 0;
2016
2017         /*
2018          * Calculate the super-cluster number containing the cluster (blk)
2019          * and obtain the super-cluster buffer.
2020          */
2021         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2022         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2023         if (supercl)
2024                 hammer_rel_supercl(supercl, 0);
2025         return (error);
2026 }
2027
2028 static int
2029 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2030 {
2031         return(0);
2032 }
2033
2034 static int
2035 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2036                       int32_t count, int32_t atblk, int32_t *fullp)
2037 {
2038         hammer_volume_t volume = info;
2039         hammer_supercl_t supercl;
2040         int32_t scl_no;
2041         int32_t r;
2042         int error = 0;
2043
2044         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2045         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2046         if (supercl) {
2047                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2048                 if (r != HAMMER_ALIST_BLOCK_NONE)
2049                         r += blk;
2050                 *fullp = hammer_alist_isfull(&supercl->alist);
2051                 hammer_modify_supercl(supercl);
2052                 hammer_rel_supercl(supercl, 0);
2053         } else {
2054                 r = HAMMER_ALIST_BLOCK_NONE;
2055                 *fullp = 0;
2056         }
2057         return(r);
2058 }
2059
2060 static int
2061 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2062                       int32_t count, int32_t atblk, int32_t *fullp)
2063 {
2064         hammer_volume_t volume = info;
2065         hammer_supercl_t supercl;
2066         int32_t scl_no;
2067         int32_t r;
2068         int error = 0;
2069
2070         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2071         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2072         if (supercl) {
2073                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2074                 if (r != HAMMER_ALIST_BLOCK_NONE)
2075                         r += blk;
2076                 *fullp = hammer_alist_isfull(&supercl->alist);
2077                 hammer_modify_supercl(supercl);
2078                 hammer_rel_supercl(supercl, 0);
2079         } else { 
2080                 r = HAMMER_ALIST_BLOCK_NONE;
2081                 *fullp = 0;
2082         }
2083         return(r);
2084 }
2085
2086 static void
2087 super_alist_free(void *info, int32_t blk, int32_t radix,
2088                  int32_t base_blk, int32_t count, int32_t *emptyp)
2089 {
2090         hammer_volume_t volume = info;
2091         hammer_supercl_t supercl;
2092         int32_t scl_no;
2093         int error = 0;
2094
2095         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2096         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2097         if (supercl) {
2098                 hammer_alist_free(&supercl->alist, base_blk, count);
2099                 *emptyp = hammer_alist_isempty(&supercl->alist);
2100                 hammer_modify_supercl(supercl);
2101                 hammer_rel_supercl(supercl, 0);
2102         } else {
2103                 *emptyp = 0;
2104         }
2105 }
2106
2107 static void
2108 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2109 {
2110 }
2111
2112 void
2113 hammer_init_alist_config(void)
2114 {
2115         hammer_alist_config_t config;
2116
2117         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2118                               1, HAMMER_FSBUF_METAELMS);
2119         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2120                               1, HAMMER_VOL_METAELMS_1LYR);
2121         hammer_alist_template(&Vol_super_alist_config,
2122                               HAMMER_VOL_MAXSUPERCLUSTERS,
2123                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2124         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2125                               1, HAMMER_SUPERCL_METAELMS);
2126         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2127                               1, HAMMER_CLU_MASTER_METAELMS);
2128         hammer_alist_template(&Clu_slave_alist_config, HAMMER_CLU_MAXBUFFERS,
2129                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2130
2131         config = &Vol_super_alist_config;
2132         config->bl_radix_init = super_alist_init;
2133         config->bl_radix_destroy = super_alist_destroy;
2134         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2135         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2136         config->bl_radix_free = super_alist_free;
2137         config->bl_radix_print = super_alist_print;
2138
2139         config = &Clu_slave_alist_config;
2140         config->bl_radix_init = buffer_alist_init;
2141         config->bl_radix_destroy = buffer_alist_destroy;
2142         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2143         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2144         config->bl_radix_free = buffer_alist_free;
2145         config->bl_radix_print = buffer_alist_print;
2146 }
2147