HAMMER 9/many - btree removal cases, mount nohistory
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.9 2007/11/30 00:16:56 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54                         hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56                         u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58                         hammer_alist_t live, u_int64_t type, int32_t nelements,
59                         int32_t start,
60                         int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63                         int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65                         int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69
70 struct hammer_alist_config Buf_alist_config;
71 struct hammer_alist_config Vol_normal_alist_config;
72 struct hammer_alist_config Vol_super_alist_config;
73 struct hammer_alist_config Supercl_alist_config;
74 struct hammer_alist_config Clu_master_alist_config;
75 struct hammer_alist_config Clu_slave_alist_config;
76
77 /*
78  * Red-Black tree support for various structures
79  */
80 static int
81 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
82 {
83         if (ip1->obj_id < ip2->obj_id)
84                 return(-1);
85         if (ip1->obj_id > ip2->obj_id)
86                 return(1);
87         if (ip1->obj_asof < ip2->obj_asof)
88                 return(-1);
89         if (ip1->obj_asof > ip2->obj_asof)
90                 return(1);
91         return(0);
92 }
93
94 static int
95 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
96 {
97         if (info->obj_id < ip->obj_id)
98                 return(-1);
99         if (info->obj_id > ip->obj_id)
100                 return(1);
101         if (info->obj_asof < ip->obj_asof)
102                 return(-1);
103         if (info->obj_asof > ip->obj_asof)
104                 return(1);
105         return(0);
106 }
107
108 static int
109 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
110 {
111         if (vol1->vol_no < vol2->vol_no)
112                 return(-1);
113         if (vol1->vol_no > vol2->vol_no)
114                 return(1);
115         return(0);
116 }
117
118 static int
119 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
120 {
121         if (cl1->scl_no < cl2->scl_no)
122                 return(-1);
123         if (cl1->scl_no > cl2->scl_no)
124                 return(1);
125         return(0);
126 }
127
128 static int
129 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
130 {
131         if (cl1->clu_no < cl2->clu_no)
132                 return(-1);
133         if (cl1->clu_no > cl2->clu_no)
134                 return(1);
135         return(0);
136 }
137
138 static int
139 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
140 {
141         if (buf1->buf_no < buf2->buf_no)
142                 return(-1);
143         if (buf1->buf_no > buf2->buf_no)
144                 return(1);
145         return(0);
146 }
147
148 static int
149 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
150 {
151         if (node1->node_offset < node2->node_offset)
152                 return(-1);
153         if (node1->node_offset > node2->node_offset)
154                 return(1);
155         return(0);
156 }
157
158 /*
159  * Note: The lookup function for hammer_ino_rb_tree winds up being named
160  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
161  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
162  */
163 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
164 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
165                 hammer_inode_info_cmp, hammer_inode_info_t);
166 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
167              hammer_vol_rb_compare, int32_t, vol_no);
168 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
169              hammer_scl_rb_compare, int32_t, scl_no);
170 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
171              hammer_clu_rb_compare, int32_t, clu_no);
172 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
173              hammer_buf_rb_compare, int32_t, buf_no);
174 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
175              hammer_nod_rb_compare, int32_t, node_offset);
176
177 /************************************************************************
178  *                              VOLUMES                                 *
179  ************************************************************************
180  *
181  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
182  * code on failure.  Volumes must be loaded at mount time, get_volume() will
183  * not load a new volume.
184  *
185  * Calls made to hammer_load_volume() or single-threaded
186  */
187 int
188 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
189 {
190         struct mount *mp;
191         hammer_volume_t volume;
192         struct hammer_volume_ondisk *ondisk;
193         struct nlookupdata nd;
194         struct buf *bp = NULL;
195         int error;
196         int ronly;
197
198         mp = hmp->mp;
199         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
200
201         /*
202          * Allocate a volume structure
203          */
204         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
205         volume->vol_name = kstrdup(volname, M_HAMMER);
206         volume->hmp = hmp;
207         volume->io.type = HAMMER_STRUCTURE_VOLUME;
208         volume->io.offset = 0LL;
209
210         /*
211          * Get the device vnode
212          */
213         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
214         if (error == 0)
215                 error = nlookup(&nd);
216         if (error == 0)
217                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
218         nlookup_done(&nd);
219         if (error == 0) {
220                 vn_isdisk(volume->devvp, &error);
221         }
222         if (error == 0) {
223                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
224                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
225                                  FSCRED, NULL);
226                 vn_unlock(volume->devvp);
227         }
228         if (error) {
229                 hammer_free_volume(volume);
230                 return(error);
231         }
232
233         /*
234          * Extract the volume number from the volume header and do various
235          * sanity checks.
236          */
237         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
238         if (error)
239                 goto late_failure;
240         ondisk = (void *)bp->b_data;
241         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
242                 kprintf("hammer_mount: volume %s has an invalid header\n",
243                         volume->vol_name);
244                 error = EFTYPE;
245                 goto late_failure;
246         }
247         volume->vol_no = ondisk->vol_no;
248         volume->cluster_base = ondisk->vol_clo_beg;
249         volume->vol_clsize = ondisk->vol_clsize;
250         volume->vol_flags = ondisk->vol_flags;
251         RB_INIT(&volume->rb_clus_root);
252         RB_INIT(&volume->rb_scls_root);
253
254         if (RB_EMPTY(&hmp->rb_vols_root)) {
255                 hmp->fsid = ondisk->vol_fsid;
256         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
257                 kprintf("hammer_mount: volume %s's fsid does not match "
258                         "other volumes\n", volume->vol_name);
259                 error = EFTYPE;
260                 goto late_failure;
261         }
262
263         /*
264          * Insert the volume structure into the red-black tree.
265          */
266         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
267                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
268                         volume->vol_name, volume->vol_no);
269                 error = EEXIST;
270         }
271
272         /*
273          * Set the root volume and load the root cluster.  HAMMER special
274          * cases rootvol and rootcl and will not deallocate the structures.
275          * We do not hold a ref because this would prevent related I/O
276          * from being flushed.
277          */
278         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
279                 hmp->rootvol = volume;
280                 hmp->rootcl = hammer_get_cluster(volume,
281                                                  ondisk->vol0_root_clu_no,
282                                                  &error, 0);
283                 hammer_rel_cluster(hmp->rootcl, 0);
284                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
285         }
286 late_failure:
287         if (bp)
288                 brelse(bp);
289         if (error) {
290                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
291                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
292                 hammer_free_volume(volume);
293         }
294         return (error);
295 }
296
297 /*
298  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
299  * so returns -1 on failure.
300  */
301 int
302 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
303 {
304         struct hammer_mount *hmp = volume->hmp;
305         hammer_cluster_t rootcl;
306         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
307
308         /*
309          * Sync clusters, sync volume
310          */
311
312
313         /*
314          * Clean up the root cluster, which is held unlocked in the root
315          * volume.
316          */
317         if (hmp->rootvol == volume) {
318                 if ((rootcl = hmp->rootcl) != NULL)
319                         hmp->rootcl = NULL;
320                 hmp->rootvol = NULL;
321         }
322
323         /*
324          * Unload clusters and super-clusters.  Unloading a super-cluster
325          * also unloads related clusters, but the filesystem may not be
326          * using super-clusters so unload clusters anyway.
327          */
328         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
329                         hammer_unload_cluster, NULL);
330         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
331                         hammer_unload_supercl, NULL);
332
333         /*
334          * Release our buffer and flush anything left in the buffer cache.
335          */
336         hammer_io_release(&volume->io, 1);
337
338         /*
339          * There should be no references on the volume.
340          */
341         KKASSERT(volume->io.lock.refs == 0);
342
343         volume->ondisk = NULL;
344         if (volume->devvp) {
345                 if (ronly) {
346                         vinvalbuf(volume->devvp, 0, 0, 0);
347                         VOP_CLOSE(volume->devvp, FREAD);
348                 } else {
349                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
350                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
351                 }
352         }
353
354         /*
355          * Destroy the structure
356          */
357         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
358         hammer_free_volume(volume);
359         return(0);
360 }
361
362 static
363 void
364 hammer_free_volume(hammer_volume_t volume)
365 {
366         if (volume->vol_name) {
367                 kfree(volume->vol_name, M_HAMMER);
368                 volume->vol_name = NULL;
369         }
370         if (volume->devvp) {
371                 vrele(volume->devvp);
372                 volume->devvp = NULL;
373         }
374         kfree(volume, M_HAMMER);
375 }
376
377 /*
378  * Get a HAMMER volume.  The volume must already exist.
379  */
380 hammer_volume_t
381 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
382 {
383         struct hammer_volume *volume;
384
385         /*
386          * Locate the volume structure
387          */
388         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
389         if (volume == NULL) {
390                 *errorp = ENOENT;
391                 return(NULL);
392         }
393         hammer_ref(&volume->io.lock);
394
395         /*
396          * Deal with on-disk info
397          */
398         if (volume->ondisk == NULL) {
399                 *errorp = hammer_load_volume(volume);
400                 if (*errorp) {
401                         hammer_rel_volume(volume, 1);
402                         volume = NULL;
403                 }
404         } else {
405                 *errorp = 0;
406         }
407         return(volume);
408 }
409
410 hammer_volume_t
411 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
412 {
413         hammer_volume_t volume;
414
415         volume = hmp->rootvol;
416         KKASSERT(volume != NULL);
417         hammer_ref(&volume->io.lock);
418
419         /*
420          * Deal with on-disk info
421          */
422         if (volume->ondisk == NULL) {
423                 *errorp = hammer_load_volume(volume);
424                 if (*errorp) {
425                         hammer_rel_volume(volume, 1);
426                         volume = NULL;
427                 }
428         } else {
429                 *errorp = 0;
430         }
431         return (volume);
432 }
433
434 /*
435  * Load a volume's on-disk information.  The volume must be referenced and
436  * not locked.  We temporarily acquire an exclusive lock to interlock
437  * against releases or multiple get's.
438  */
439 static int
440 hammer_load_volume(hammer_volume_t volume)
441 {
442         struct hammer_volume_ondisk *ondisk;
443         int error;
444
445         hammer_lock_ex(&volume->io.lock);
446         if (volume->ondisk == NULL) {
447                 error = hammer_io_read(volume->devvp, &volume->io);
448                 if (error) {
449                         hammer_unlock(&volume->io.lock);
450                         return (error);
451                 }
452                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
453
454                 /*
455                  * Configure the volume's A-lists.  These are used to
456                  * allocate clusters.
457                  */
458                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
459                         volume->alist.config = &Vol_super_alist_config;
460                         volume->alist.meta = ondisk->vol_almeta.super;
461                         volume->alist.info = volume;
462                 } else {
463                         volume->alist.config = &Vol_normal_alist_config;
464                         volume->alist.meta = ondisk->vol_almeta.normal;
465                         volume->alist.info = NULL;
466                 }
467                 hammer_alist_init(&volume->alist);
468         } else {
469                 error = 0;
470         }
471         hammer_unlock(&volume->io.lock);
472         return(0);
473 }
474
475 /*
476  * Release a volume.  Call hammer_io_release on the last reference.  We have
477  * to acquire an exclusive lock to interlock against volume->ondisk tests
478  * in hammer_load_volume().
479  */
480 void
481 hammer_rel_volume(hammer_volume_t volume, int flush)
482 {
483         if (hammer_islastref(&volume->io.lock)) {
484                 hammer_lock_ex(&volume->io.lock);
485                 if (hammer_islastref(&volume->io.lock)) {
486                         volume->ondisk = NULL;
487                         hammer_io_release(&volume->io, flush);
488                 }
489                 hammer_unlock(&volume->io.lock);
490         }
491         hammer_unref(&volume->io.lock);
492 }
493
494 /************************************************************************
495  *                              SUPER-CLUSTERS                          *
496  ************************************************************************
497  *
498  * Manage super-clusters.  Note that a supercl holds a reference to its
499  * associated volume.
500  */
501 hammer_supercl_t
502 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
503                    int *errorp, int isnew)
504 {
505         hammer_supercl_t supercl;
506
507         /*
508          * Locate and lock the super-cluster structure, creating one
509          * if necessary.
510          */
511 again:
512         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
513         if (supercl == NULL) {
514                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
515                 supercl->scl_no = scl_no;
516                 supercl->volume = volume;
517                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
518                 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
519                 hammer_ref(&supercl->io.lock);
520
521                 /*
522                  * Insert the cluster into the RB tree and handle late
523                  * collisions.
524                  */
525                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
526                         hammer_unref(&supercl->io.lock);
527                         kfree(supercl, M_HAMMER);
528                         goto again;
529                 }
530                 hammer_ref(&volume->io.lock);
531         } else {
532                 hammer_ref(&supercl->io.lock);
533         }
534
535         /*
536          * Deal with on-disk info
537          */
538         if (supercl->ondisk == NULL || isnew) {
539                 *errorp = hammer_load_supercl(supercl, isnew);
540                 if (*errorp) {
541                         hammer_rel_supercl(supercl, 1);
542                         supercl = NULL;
543                 }
544         } else {
545                 *errorp = 0;
546         }
547         return(supercl);
548 }
549
550 static int
551 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
552 {
553         struct hammer_supercl_ondisk *ondisk;
554         hammer_volume_t volume = supercl->volume;
555         int error;
556
557         hammer_lock_ex(&supercl->io.lock);
558         if (supercl->ondisk == NULL) {
559                 if (isnew)
560                         error = hammer_io_new(volume->devvp, &supercl->io);
561                 else
562                         error = hammer_io_read(volume->devvp, &supercl->io);
563                 if (error) {
564                         hammer_unlock(&supercl->io.lock);
565                         return (error);
566                 }
567                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
568
569                 supercl->alist.config = &Supercl_alist_config;
570                 supercl->alist.meta = ondisk->scl_meta;
571                 supercl->alist.info = NULL;
572         } else if (isnew) {
573                 error = hammer_io_new(volume->devvp, &supercl->io);
574         } else {
575                 error = 0;
576         }
577         if (error == 0 && isnew) {
578                 /*
579                  * If this is a new super-cluster we have to initialize
580                  * various ondisk structural elements.  The caller is
581                  * responsible for the remainder.
582                  */
583                 struct hammer_alist_live dummy;
584
585                 ondisk = supercl->ondisk;
586                 dummy.config = &Buf_alist_config;
587                 dummy.meta = ondisk->head.buf_almeta;
588                 dummy.info = NULL;
589                 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
590                 hammer_alist_init(&supercl->alist);
591         }
592         hammer_unlock(&supercl->io.lock);
593         return (error);
594 }
595
596 /*
597  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
598  */
599 int
600 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
601 {
602         KKASSERT(supercl->io.lock.refs == 0);
603         hammer_ref(&supercl->io.lock);
604         hammer_io_release(&supercl->io, 1);
605         hammer_rel_supercl(supercl, 1);
606         return(0);
607 }
608
609 /*
610  * Release a super-cluster.  We have to deal with several places where
611  * another thread can ref the super-cluster.
612  *
613  * Only destroy the structure itself if the related buffer cache buffer
614  * was disassociated from it.  This ties the management of the structure
615  * to the buffer cache subsystem.
616  */
617 void
618 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
619 {
620         hammer_volume_t volume;
621
622         if (hammer_islastref(&supercl->io.lock)) {
623                 hammer_lock_ex(&supercl->io.lock);
624                 if (hammer_islastref(&supercl->io.lock)) {
625                         hammer_io_release(&supercl->io, flush);
626                         if (supercl->io.bp == NULL &&
627                             hammer_islastref(&supercl->io.lock)) {
628                                 volume = supercl->volume;
629                                 RB_REMOVE(hammer_scl_rb_tree,
630                                           &volume->rb_scls_root, supercl);
631                                 supercl->volume = NULL; /* sanity */
632                                 kfree(supercl, M_HAMMER);
633                                 hammer_rel_volume(volume, 0);
634                                 return;
635                         }
636                 }
637                 hammer_unlock(&supercl->io.lock);
638         }
639         hammer_unref(&supercl->io.lock);
640 }
641
642 /************************************************************************
643  *                              CLUSTERS                                *
644  ************************************************************************
645  *
646  * Manage clusters.  Note that a cluster holds a reference to its
647  * associated volume.
648  */
649 hammer_cluster_t
650 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
651                    int *errorp, int isnew)
652 {
653         hammer_cluster_t cluster;
654
655 again:
656         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
657         if (cluster == NULL) {
658                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
659                 cluster->clu_no = clu_no;
660                 cluster->volume = volume;
661                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
662                 RB_INIT(&cluster->rb_bufs_root);
663                 RB_INIT(&cluster->rb_nods_root);
664                 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
665                 hammer_ref(&cluster->io.lock);
666
667                 /*
668                  * Insert the cluster into the RB tree and handle late
669                  * collisions.
670                  */
671                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
672                         hammer_unref(&cluster->io.lock);
673                         kfree(cluster, M_HAMMER);
674                         goto again;
675                 }
676                 hammer_ref(&volume->io.lock);
677         } else {
678                 hammer_ref(&cluster->io.lock);
679         }
680
681         /*
682          * Deal with on-disk info
683          */
684         if (cluster->ondisk == NULL || isnew) {
685                 *errorp = hammer_load_cluster(cluster, isnew);
686                 if (*errorp) {
687                         hammer_rel_cluster(cluster, 1);
688                         cluster = NULL;
689                 }
690         } else {
691                 *errorp = 0;
692         }
693         return (cluster);
694 }
695
696 hammer_cluster_t
697 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
698 {
699         hammer_cluster_t cluster;
700
701         cluster = hmp->rootcl;
702         KKASSERT(cluster != NULL);
703         hammer_ref(&cluster->io.lock);
704
705         /*
706          * Deal with on-disk info
707          */
708         if (cluster->ondisk == NULL) {
709                 *errorp = hammer_load_cluster(cluster, 0);
710                 if (*errorp) {
711                         hammer_rel_cluster(cluster, 1);
712                         cluster = NULL;
713                 }
714         } else {
715                 *errorp = 0;
716         }
717         return (cluster);
718 }
719
720 static
721 int
722 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
723 {
724         hammer_volume_t volume = cluster->volume;
725         struct hammer_cluster_ondisk *ondisk;
726         int error;
727
728         /*
729          * Load the cluster's on-disk info
730          */
731         hammer_lock_ex(&cluster->io.lock);
732         if (cluster->ondisk == NULL) {
733                 if (isnew)
734                         error = hammer_io_new(volume->devvp, &cluster->io);
735                 else
736                         error = hammer_io_read(volume->devvp, &cluster->io);
737                 if (error) {
738                         hammer_unlock(&cluster->io.lock);
739                         return (error);
740                 }
741                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
742
743                 cluster->alist_master.config = &Clu_master_alist_config;
744                 cluster->alist_master.meta = ondisk->clu_master_meta;
745                 cluster->alist_btree.config = &Clu_slave_alist_config;
746                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
747                 cluster->alist_btree.info = cluster;
748                 cluster->alist_record.config = &Clu_slave_alist_config;
749                 cluster->alist_record.meta = ondisk->clu_record_meta;
750                 cluster->alist_record.info = cluster;
751                 cluster->alist_mdata.config = &Clu_slave_alist_config;
752                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
753                 cluster->alist_mdata.info = cluster;
754
755                 cluster->clu_btree_beg = ondisk->clu_btree_beg;
756                 cluster->clu_btree_end = ondisk->clu_btree_end;
757         } else if (isnew) {
758                 error = hammer_io_new(volume->devvp, &cluster->io);
759         } else {
760                 error = 0;
761         }
762         if (error == 0 && isnew) {
763                 /*
764                  * If this is a new cluster we have to initialize
765                  * various ondisk structural elements.  The caller is
766                  * responsible for the remainder.
767                  */
768                 struct hammer_alist_live dummy;
769
770                 ondisk = cluster->ondisk;
771
772                 dummy.config = &Buf_alist_config;
773                 dummy.meta = ondisk->head.buf_almeta;
774                 dummy.info = NULL;
775                 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
776
777                 hammer_alist_init(&cluster->alist_master);
778                 hammer_alist_init(&cluster->alist_btree);
779                 hammer_alist_init(&cluster->alist_record);
780                 hammer_alist_init(&cluster->alist_mdata);
781         }
782         hammer_unlock(&cluster->io.lock);
783         return (error);
784 }
785
786 /*
787  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
788  */
789 int
790 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
791 {
792         hammer_ref(&cluster->io.lock);
793         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
794                         hammer_unload_buffer, NULL);
795         KKASSERT(cluster->io.lock.refs == 1);
796         hammer_io_release(&cluster->io, 1);
797         hammer_rel_cluster(cluster, 1);
798         return(0);
799 }
800
801 /*
802  * Reference a cluster that is either already referenced or via a specially
803  * handled pointer (aka rootcl).
804  */
805 int
806 hammer_ref_cluster(hammer_cluster_t cluster)
807 {
808         int error;
809
810         KKASSERT(cluster != NULL);
811         hammer_ref(&cluster->io.lock);
812
813         /*
814          * Deal with on-disk info
815          */
816         if (cluster->ondisk == NULL) {
817                 error = hammer_load_cluster(cluster, 0);
818                 if (error)
819                         hammer_rel_cluster(cluster, 1);
820         } else {
821                 error = 0;
822         }
823         return(error);
824 }
825
826 /*
827  * Release a cluster.  We have to deal with several places where
828  * another thread can ref the cluster.
829  *
830  * Only destroy the structure itself if the related buffer cache buffer
831  * was disassociated from it.  This ties the management of the structure
832  * to the buffer cache subsystem.
833  */
834 void
835 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
836 {
837         hammer_node_t node;
838         hammer_volume_t volume;
839
840         if (hammer_islastref(&cluster->io.lock)) {
841                 hammer_lock_ex(&cluster->io.lock);
842                 if (hammer_islastref(&cluster->io.lock)) {
843                         hammer_io_release(&cluster->io, flush);
844
845                         /*
846                          * Clean out the B-Tree node cache, if any, then
847                          * clean up the volume ref and free the cluster.
848                          *
849                          * If the cluster acquires a new reference while we
850                          * are trying to clean it out, abort the cleaning.
851                          *
852                          * There really shouldn't be any nodes at this point
853                          * but we allow a node with no buffer association
854                          * so handle the case.
855                          */
856                         while (cluster->io.bp == NULL &&
857                                hammer_islastref(&cluster->io.lock) &&
858                                (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
859                         ) {
860                                 KKASSERT(node->lock.refs == 0);
861                                 hammer_flush_node(node);
862                         }
863                         if (cluster->io.bp == NULL &&
864                             hammer_islastref(&cluster->io.lock)) {
865                                 volume = cluster->volume;
866                                 RB_REMOVE(hammer_clu_rb_tree,
867                                           &volume->rb_clus_root, cluster);
868                                 cluster->volume = NULL; /* sanity */
869                                 kfree(cluster, M_HAMMER);
870                                 hammer_rel_volume(volume, 0);
871                                 return;
872                         }
873                 }
874                 hammer_unlock(&cluster->io.lock);
875         }
876         hammer_unref(&cluster->io.lock);
877 }
878
879 /************************************************************************
880  *                              BUFFERS                                 *
881  ************************************************************************
882  *
883  * Manage buffers.  Note that a buffer holds a reference to its associated
884  * cluster, and its cluster will hold a reference to the cluster's volume.
885  *
886  * A non-zero buf_type indicates that a new buffer should be created and
887  * zero'd.
888  */
889 hammer_buffer_t
890 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
891                   u_int64_t buf_type, int *errorp)
892 {
893         hammer_buffer_t buffer;
894
895         /*
896          * Find the buffer.  Note that buffer 0 corresponds to the cluster
897          * header and should never be requested.
898          */
899         KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
900                  buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
901
902         /*
903          * Locate and lock the buffer structure, creating one if necessary.
904          */
905 again:
906         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
907         if (buffer == NULL) {
908                 buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
909                 buffer->buf_no = buf_no;
910                 buffer->cluster = cluster;
911                 buffer->volume = cluster->volume;
912                 buffer->io.offset = cluster->io.offset +
913                                     (buf_no * HAMMER_BUFSIZE);
914                 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
915                 TAILQ_INIT(&buffer->clist);
916                 hammer_ref(&buffer->io.lock);
917
918                 /*
919                  * Insert the cluster into the RB tree and handle late
920                  * collisions.
921                  */
922                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
923                         hammer_unref(&buffer->io.lock);
924                         kfree(buffer, M_HAMMER);
925                         goto again;
926                 }
927                 hammer_ref(&cluster->io.lock);
928         } else {
929                 hammer_ref(&buffer->io.lock);
930         }
931
932         /*
933          * Deal with on-disk info
934          */
935         if (buffer->ondisk == NULL || buf_type) {
936                 *errorp = hammer_load_buffer(buffer, buf_type);
937                 if (*errorp) {
938                         hammer_rel_buffer(buffer, 1);
939                         buffer = NULL;
940                 }
941         } else {
942                 *errorp = 0;
943         }
944         return(buffer);
945 }
946
947 static int
948 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
949 {
950         hammer_volume_t volume;
951         hammer_fsbuf_ondisk_t ondisk;
952         int error;
953
954         /*
955          * Load the buffer's on-disk info
956          */
957         volume = buffer->volume;
958         hammer_lock_ex(&buffer->io.lock);
959         if (buffer->ondisk == NULL) {
960                 if (buf_type) {
961                         error = hammer_io_new(volume->devvp, &buffer->io);
962                 } else {
963                         error = hammer_io_read(volume->devvp, &buffer->io);
964                 }
965                 if (error) {
966                         hammer_unlock(&buffer->io.lock);
967                         return (error);
968                 }
969                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
970                 buffer->alist.config = &Buf_alist_config;
971                 buffer->alist.meta = ondisk->head.buf_almeta;
972                 buffer->buf_type = ondisk->head.buf_type;
973         } else if (buf_type) {
974                 error = hammer_io_new(volume->devvp, &buffer->io);
975         } else {
976                 error = 0;
977         }
978         if (error == 0 && buf_type) {
979                 ondisk = buffer->ondisk;
980                 initbuffer(&buffer->alist, &ondisk->head, buf_type);
981                 buffer->buf_type = ondisk->head.buf_type;
982         }
983         hammer_unlock(&buffer->io.lock);
984         return (error);
985 }
986
987 /*
988  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
989  */
990 int
991 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
992 {
993         hammer_ref(&buffer->io.lock);
994         hammer_flush_buffer_nodes(buffer);
995         hammer_io_release(&buffer->io, 1);
996         KKASSERT(buffer->io.lock.refs == 1);
997         hammer_rel_buffer(buffer, 1);
998         return(0);
999 }
1000
1001 /*
1002  * Reference a buffer that is either already referenced or via a specially
1003  * handled pointer (aka cursor->buffer).
1004  */
1005 int
1006 hammer_ref_buffer(hammer_buffer_t buffer)
1007 {
1008         int error;
1009
1010         hammer_ref(&buffer->io.lock);
1011         if (buffer->ondisk == NULL) {
1012                 error = hammer_load_buffer(buffer, 0);
1013                 if (error) {
1014                         hammer_rel_buffer(buffer, 1);
1015                         /*
1016                          * NOTE: buffer pointer can become stale after
1017                          * the above release.
1018                          */
1019                 } else {
1020                         KKASSERT(buffer->buf_type ==
1021                                  buffer->ondisk->head.buf_type);
1022                 }
1023         } else {
1024                 error = 0;
1025         }
1026         return(error);
1027 }
1028
1029 /*
1030  * Release a buffer.  We have to deal with several places where
1031  * another thread can ref the buffer.
1032  *
1033  * Only destroy the structure itself if the related buffer cache buffer
1034  * was disassociated from it.  This ties the management of the structure
1035  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1036  * embedded io is referenced or not.
1037  */
1038 void
1039 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1040 {
1041         hammer_cluster_t cluster;
1042         hammer_node_t node;
1043
1044         if (hammer_islastref(&buffer->io.lock)) {
1045                 hammer_lock_ex(&buffer->io.lock);
1046                 if (hammer_islastref(&buffer->io.lock)) {
1047                         hammer_io_release(&buffer->io, flush);
1048
1049                         /*
1050                          * Clean out the B-Tree node cache, if any, then
1051                          * clean up the cluster ref and free the buffer.
1052                          *
1053                          * If the buffer acquires a new reference while we
1054                          * are trying to clean it out, abort the cleaning.
1055                          */
1056                         while (buffer->io.bp == NULL &&
1057                                hammer_islastref(&buffer->io.lock) &&
1058                                (node = TAILQ_FIRST(&buffer->clist)) != NULL
1059                         ) {
1060                                 KKASSERT(node->lock.refs == 0);
1061                                 hammer_flush_node(node);
1062                         }
1063                         if (buffer->io.bp == NULL &&
1064                             hammer_islastref(&buffer->io.lock)) {
1065                                 cluster = buffer->cluster;
1066                                 RB_REMOVE(hammer_buf_rb_tree,
1067                                           &cluster->rb_bufs_root, buffer);
1068                                 buffer->cluster = NULL; /* sanity */
1069                                 kfree(buffer, M_HAMMER);
1070                                 hammer_rel_cluster(cluster, 0);
1071                                 return;
1072                         }
1073                 }
1074                 hammer_unlock(&buffer->io.lock);
1075         }
1076         hammer_unref(&buffer->io.lock);
1077 }
1078
1079 /*
1080  * Flush passively cached B-Tree nodes associated with this buffer.
1081  *
1082  * NOTE: The buffer is referenced and locked.
1083  */
1084 void
1085 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1086 {
1087         hammer_node_t node;
1088
1089         node = TAILQ_FIRST(&buffer->clist);
1090         while (node) {
1091                 buffer->save_scan = TAILQ_NEXT(node, entry);
1092                 if (node->lock.refs == 0)
1093                         hammer_flush_node(node);
1094                 node = buffer->save_scan;
1095         }
1096 }
1097
1098 /************************************************************************
1099  *                              NODES                                   *
1100  ************************************************************************
1101  *
1102  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1103  * method used by the HAMMER filesystem.
1104  *
1105  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1106  * associated with its buffer.  It can have an active buffer reference
1107  * even when the node itself has no references.  The node also passively
1108  * associates itself with its cluster without holding any cluster refs.
1109  * The cluster ref is indirectly maintained by the active buffer ref when
1110  * a node is acquired.
1111  *
1112  * A hammer_node can also be passively associated with other HAMMER
1113  * structures, such as inodes, while retaining 0 references.  These
1114  * associations can be cleared backwards using a pointer-to-pointer in
1115  * the hammer_node.
1116  *
1117  * This allows the HAMMER implementation to cache hammer_node's long-term
1118  * and short-cut a great deal of the infrastructure's complexity.  In
1119  * most cases a cached node can be reacquired without having to dip into
1120  * either the buffer or cluster management code.
1121  *
1122  * The caller must pass a referenced cluster on call and will retain
1123  * ownership of the reference on return.  The node will acquire its own
1124  * additional references, if necessary.
1125  */
1126 hammer_node_t
1127 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1128 {
1129         hammer_node_t node;
1130
1131         /*
1132          * Locate the structure, allocating one if necessary.
1133          */
1134 again:
1135         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1136                          node_offset);
1137         if (node == NULL) {
1138                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1139                 node->node_offset = node_offset;
1140                 node->cluster = cluster;
1141                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1142                               node)) {
1143                         kfree(node, M_HAMMER);
1144                         goto again;
1145                 }
1146         }
1147         *errorp = hammer_ref_node(node);
1148         if (*errorp) {
1149                 /*
1150                  * NOTE: The node pointer may be stale on error return.
1151                  * In fact, its probably been destroyed.
1152                  */
1153                 node = NULL;
1154         }
1155         return(node);
1156 }
1157
1158 /*
1159  * Reference the node to prevent disassociations, then associate and
1160  * load the related buffer.  This routine can also be called to reference
1161  * a node from a cache pointer.
1162  *
1163  * NOTE: Because the caller does not have a ref on the node, the caller's
1164  * node pointer will be stale if an error is returned.  We may also wind
1165  * up clearing the related cache pointers.
1166  *
1167  * NOTE: The cluster is indirectly referenced by our buffer ref.
1168  */
1169 int
1170 hammer_ref_node(hammer_node_t node)
1171 {
1172         hammer_buffer_t buffer;
1173         int32_t buf_no;
1174         int error;
1175
1176         hammer_ref(&node->lock);
1177         error = 0;
1178         if (node->ondisk == NULL) {
1179                 hammer_lock_ex(&node->lock);
1180                 if (node->ondisk == NULL) {
1181                         /*
1182                          * This is a little confusing but the jist is that
1183                          * node->buffer determines whether the node is on
1184                          * the buffer's clist and node->ondisk determines
1185                          * whether the buffer is referenced.
1186                          */
1187                         if ((buffer = node->buffer) != NULL) {
1188                                 error = hammer_ref_buffer(buffer);
1189                         } else {
1190                                 buf_no = node->node_offset / HAMMER_BUFSIZE;
1191                                 buffer = hammer_get_buffer(node->cluster,
1192                                                            buf_no, 0, &error);
1193                                 if (buffer) {
1194                                         KKASSERT(error == 0);
1195                                         TAILQ_INSERT_TAIL(&buffer->clist,
1196                                                           node, entry);
1197                                         node->buffer = buffer;
1198                                 }
1199                         }
1200                         if (error == 0) {
1201                                 node->ondisk = (void *)((char *)buffer->ondisk +
1202                                        (node->node_offset & HAMMER_BUFMASK));
1203                         }
1204                 }
1205                 hammer_unlock(&node->lock);
1206         }
1207         if (error)
1208                 hammer_rel_node(node);
1209         return (error);
1210 }
1211
1212 /*
1213  * Release a hammer_node.  The node retains a passive association with
1214  * its cluster, buffer and caches.
1215  *
1216  * However, to avoid cluttering up kernel memory with tons of B-Tree
1217  * node cache structures we destroy the node if no passive cache or
1218  * (instantiated) buffer references exist.
1219  */
1220 void
1221 hammer_rel_node(hammer_node_t node)
1222 {
1223         hammer_cluster_t cluster;
1224         hammer_buffer_t buffer;
1225
1226         if (hammer_islastref(&node->lock)) {
1227                 cluster = node->cluster;
1228                 /*
1229                  * Clutter control, this case only occurs after a failed
1230                  * load since otherwise ondisk will be non-NULL.
1231                  */
1232                 if (node->cache1 == NULL && node->cache2 == NULL && 
1233                     node->ondisk == NULL) {
1234                         RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1235                                   node);
1236                         if ((buffer = node->buffer) != NULL) {
1237                                 node->buffer = NULL;
1238                                 hammer_remove_node_clist(buffer, node);
1239                         }
1240                         kfree(node, M_HAMMER);
1241                         return;
1242                 }
1243
1244                 /*
1245                  * node->ondisk determines whether we have a buffer reference
1246                  * to get rid of or not.  Only get rid of the reference if
1247                  * the kernel tried to flush the buffer.
1248                  *
1249                  * NOTE: Once unref'd the node can be physically destroyed,
1250                  * so our node is stale afterwords.
1251                  *
1252                  * This case occurs if the node still has cache references.
1253                  * We could remove the references and free the structure
1254                  * but for now we allow them (and the node structure) to
1255                  * remain intact.
1256                  */
1257                 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1258                         buffer = node->buffer;
1259                         node->buffer = NULL;
1260                         node->ondisk = NULL;
1261                         hammer_remove_node_clist(buffer, node);
1262                         hammer_unref(&node->lock);
1263                         hammer_rel_buffer(buffer, 0);
1264                 } else {
1265                         hammer_unref(&node->lock);
1266                 }
1267         } else {
1268                 hammer_unref(&node->lock);
1269         }
1270 }
1271
1272 /*
1273  * Cache-and-release a hammer_node.  Kinda like catching and releasing a
1274  * fish, but keeping an eye on him.  The node is passively cached in *cache.
1275  *
1276  * NOTE!  HAMMER may NULL *cache at any time, even after you have
1277  * referenced the node!
1278  */
1279 void
1280 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1281 {
1282         if (node->cache1 != cache) {
1283                 if (node->cache2 == cache) {
1284                         struct hammer_node **tmp;
1285                         tmp = node->cache1;
1286                         node->cache1 = node->cache2;
1287                         node->cache2 = tmp;
1288                 } else {
1289                         if (node->cache2)
1290                                 *node->cache2 = NULL;
1291                         node->cache2 = node->cache1;
1292                         node->cache1 = cache;
1293                         *cache = node;
1294                 }
1295         }
1296 }
1297
1298 void
1299 hammer_uncache_node(struct hammer_node **cache)
1300 {
1301         hammer_node_t node;
1302
1303         if ((node = *cache) != NULL) {
1304                 *cache = NULL;
1305                 if (node->cache1 == cache) {
1306                         node->cache1 = node->cache2;
1307                         node->cache2 = NULL;
1308                 } else if (node->cache2 == cache) {
1309                         node->cache2 = NULL;
1310                 } else {
1311                         panic("hammer_uncache_node: missing cache linkage");
1312                 }
1313                 if (node->cache1 == NULL && node->cache2 == NULL &&
1314                     node->lock.refs == 0) {
1315                         hammer_flush_node(node);
1316                 }
1317         }
1318 }
1319
1320 /*
1321  * Remove a node's cache references and destroy the node if it has no
1322  * references.  This is typically called from the buffer handling code.
1323  *
1324  * The node may have an active buffer reference (ondisk != NULL) even
1325  * if the node itself has no references.
1326  *
1327  * Note that a caller iterating through nodes via a buffer must have its
1328  * own reference on the buffer or our hammer_rel_buffer() call below may
1329  * rip it out from under the caller.
1330  */
1331 void
1332 hammer_flush_node(hammer_node_t node)
1333 {
1334         hammer_buffer_t buffer;
1335
1336         if (node->cache1)
1337                 *node->cache1 = NULL;
1338         if (node->cache2)
1339                 *node->cache2 = NULL;
1340         if (node->lock.refs == 0) {
1341                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1342                           node);
1343                 if ((buffer = node->buffer) != NULL) {
1344                         node->buffer = NULL;
1345                         hammer_remove_node_clist(buffer, node);
1346                         if (node->ondisk) {
1347                                 node->ondisk = NULL;
1348                                 hammer_rel_buffer(buffer, 0);
1349                         }
1350                 }
1351                 kfree(node, M_HAMMER);
1352         }
1353 }
1354
1355 /*
1356  * Remove a node from the buffer's clist.  Adjust save_scan as appropriate.
1357  * This is in its own little routine to properly handle interactions with
1358  * save_scan, so it is possible to block while scanning a buffer's node list.
1359  */
1360 static
1361 void
1362 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1363 {
1364         if (buffer->save_scan == node)
1365                 buffer->save_scan = TAILQ_NEXT(node, entry);
1366         TAILQ_REMOVE(&buffer->clist, node, entry);
1367 }
1368
1369 /************************************************************************
1370  *                              A-LIST ALLOCATORS                       *
1371  ************************************************************************/
1372
1373 /*
1374  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1375  *
1376  * The passed *bufferp should be initialized to NULL.  On successive calls
1377  * *bufferp caches the most recent buffer used until put away by the caller.
1378  * Note that previously returned pointers using the cached buffer become
1379  * invalid on successive calls which reuse *bufferp.
1380  *
1381  * All allocations first attempt to use the block found at the specified
1382  * iterator.  If that fails the first available block is used.  If that
1383  * fails a new buffer is allocated and associated with the buffer type
1384  * A-list and the element is allocated out of the new buffer.
1385  */
1386
1387 hammer_node_t
1388 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1389 {
1390         hammer_buffer_t buffer;
1391         hammer_alist_t live;
1392         hammer_node_t node;
1393         int32_t elm_no;
1394         int32_t buf_no;
1395         int32_t node_offset;
1396
1397         /*
1398          * Allocate a B-Tree element
1399          */
1400         buffer = NULL;
1401         live = &cluster->alist_btree;
1402         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1403         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1404                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1405         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1406                 alloc_new_buffer(cluster, live,
1407                                  HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1408                                  cluster->ondisk->idx_index, errorp, &buffer);
1409                 elm_no = hammer_alist_alloc(live, 1);
1410                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1411                         *errorp = ENOSPC;
1412                         if (buffer)
1413                                 hammer_rel_buffer(buffer, 0);
1414                         hammer_modify_cluster(cluster);
1415                         return(NULL);
1416                 }
1417         }
1418         cluster->ondisk->idx_index = elm_no;
1419         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1420         hammer_modify_cluster(cluster);
1421
1422         /*
1423          * Load and return the B-Tree element
1424          */
1425         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1426         node_offset = buf_no * HAMMER_BUFSIZE +
1427                       offsetof(union hammer_fsbuf_ondisk,
1428                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1429         node = hammer_get_node(cluster, node_offset, errorp);
1430         if (node) {
1431                 bzero(node->ondisk, sizeof(*node->ondisk));
1432         } else {
1433                 hammer_alist_free(live, elm_no, 1);
1434                 hammer_rel_node(node);
1435                 node = NULL;
1436         }
1437         if (buffer)
1438                 hammer_rel_buffer(buffer, 0);
1439         return(node);
1440 }
1441
1442 void *
1443 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1444                   int *errorp, struct hammer_buffer **bufferp)
1445 {
1446         hammer_buffer_t buffer;
1447         hammer_alist_t live;
1448         int32_t elm_no;
1449         int32_t buf_no;
1450         int32_t nblks;
1451         void *item;
1452
1453         /*
1454          * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
1455          * for these allocations.
1456          */
1457         if ((bytes & HAMMER_BUFMASK) == 0) {
1458                 nblks = bytes / HAMMER_BUFSIZE;
1459                 /* only one block allowed for now (so buffer can hold it) */
1460                 KKASSERT(nblks == 1);
1461
1462                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1463                                                 nblks,
1464                                                 cluster->ondisk->idx_ldata);
1465                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1466                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1467                                                 nblks,
1468                                                 0);
1469                 }
1470                 hammer_modify_cluster(cluster);
1471                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1472                         *errorp = ENOSPC;
1473                         return(NULL);
1474                 }
1475                 cluster->ondisk->idx_ldata = buf_no;
1476                 buffer = *bufferp;
1477                 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1478                 if (buffer)
1479                         hammer_rel_buffer(buffer, 0);
1480                 buffer = *bufferp;
1481                 return(buffer->ondisk);
1482         }
1483
1484         /*
1485          * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
1486          * (64 bytes) for these allocations.
1487          */
1488         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1489         nblks /= HAMMER_DATA_BLKSIZE;
1490         live = &cluster->alist_mdata;
1491         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1492         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1493                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1494         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1495                 alloc_new_buffer(cluster, live,
1496                                  HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1497                                  cluster->ondisk->idx_data, errorp, bufferp);
1498                 elm_no = hammer_alist_alloc(live, nblks);
1499                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1500                         *errorp = ENOSPC;
1501                         hammer_modify_cluster(cluster);
1502                         return(NULL);
1503                 }
1504         }
1505         cluster->ondisk->idx_index = elm_no;
1506         hammer_modify_cluster(cluster);
1507
1508         /*
1509          * Load and return the B-Tree element
1510          */
1511         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1512         buffer = *bufferp;
1513         if (buffer == NULL || buffer->cluster != cluster ||
1514             buffer->buf_no != buf_no) {
1515                 if (buffer)
1516                         hammer_rel_buffer(buffer, 0);
1517                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1518                 *bufferp = buffer;
1519         }
1520         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1521         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1522         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1523         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1524         *errorp = 0;
1525         return(item);
1526 }
1527
1528 void *
1529 hammer_alloc_record(hammer_cluster_t cluster,
1530                     int *errorp, struct hammer_buffer **bufferp)
1531 {
1532         hammer_buffer_t buffer;
1533         hammer_alist_t live;
1534         int32_t elm_no;
1535         int32_t buf_no;
1536         void *item;
1537
1538         /*
1539          * Allocate a record element
1540          */
1541         live = &cluster->alist_record;
1542         kprintf("IDX_RECORD %d\n", cluster->ondisk->idx_record);
1543         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1544         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1545                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1546         kprintf("hammer_alloc_record elm %08x\n", elm_no);
1547         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1548                 alloc_new_buffer(cluster, live,
1549                                  HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1550                                  cluster->ondisk->idx_record, errorp, bufferp);
1551                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1552                 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1553                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1554                         *errorp = ENOSPC;
1555                         hammer_modify_cluster(cluster);
1556                         return(NULL);
1557                 }
1558         }
1559         cluster->ondisk->idx_record = elm_no;
1560         hammer_modify_cluster(cluster);
1561
1562         /*
1563          * Load and return the B-Tree element
1564          */
1565         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1566         buffer = *bufferp;
1567         if (buffer == NULL || buffer->cluster != cluster ||
1568             buffer->buf_no != buf_no) {
1569                 if (buffer)
1570                         hammer_rel_buffer(buffer, 0);
1571                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1572                 *bufferp = buffer;
1573         }
1574         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1575         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1576         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1577         bzero(item, sizeof(union hammer_record_ondisk));
1578         *errorp = 0;
1579         return(item);
1580 }
1581
1582 void
1583 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1584 {
1585         int32_t elm_no;
1586         int32_t nblks;
1587         hammer_alist_t live;
1588
1589         if ((bytes & HAMMER_BUFMASK) == 0) {
1590                 nblks = bytes / HAMMER_BUFSIZE;
1591                 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1592                 hammer_alist_free(&buffer->cluster->alist_master,
1593                                   buffer->buf_no, nblks);
1594                 hammer_modify_cluster(buffer->cluster);
1595                 return;
1596         }
1597
1598         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1599                  HAMMER_DATA_BLKSIZE;
1600         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1601         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1602         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1603         nblks /= HAMMER_DATA_BLKSIZE;
1604         live = &buffer->cluster->alist_mdata;
1605         hammer_alist_free(live, elm_no, nblks);
1606         hammer_modify_cluster(buffer->cluster);
1607 }
1608
1609 void
1610 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1611 {
1612         int32_t elm_no;
1613         hammer_alist_t live;
1614
1615         elm_no = rec - &buffer->ondisk->record.recs[0];
1616         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1617         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1618         live = &buffer->cluster->alist_record;
1619         hammer_alist_free(live, elm_no, 1);
1620         hammer_modify_cluster(buffer->cluster);
1621 }
1622
1623 void
1624 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1625 {
1626         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1627         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1628         hammer_alist_t live;
1629         int32_t elm_no;
1630
1631         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1632         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1633         live = &cluster->alist_btree;
1634         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1635         elm_no += fsbuf_offset / blksize;
1636         hammer_alist_free(live, elm_no, 1);
1637         hammer_modify_cluster(cluster);
1638 }
1639
1640 void
1641 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1642 {
1643         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1644         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1645         hammer_alist_t live;
1646         int32_t elm_no;
1647         int32_t buf_no;
1648         int32_t nblks;
1649
1650         if ((bytes & HAMMER_BUFMASK) == 0) {
1651                 nblks = bytes / HAMMER_BUFSIZE;
1652                 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1653                 buf_no = bclu_offset / HAMMER_BUFSIZE;
1654                 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1655                 hammer_modify_cluster(cluster);
1656                 return;
1657         }
1658
1659         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1660         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1661         live = &cluster->alist_mdata;
1662         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1663         nblks /= HAMMER_DATA_BLKSIZE;
1664         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1665         elm_no += fsbuf_offset / blksize;
1666         hammer_alist_free(live, elm_no, nblks);
1667         hammer_modify_cluster(cluster);
1668 }
1669
1670 void
1671 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1672 {
1673         const int32_t blksize = sizeof(union hammer_record_ondisk);
1674         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1675         hammer_alist_t live;
1676         int32_t elm_no;
1677
1678         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1679         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1680         live = &cluster->alist_record;
1681         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1682         elm_no += fsbuf_offset / blksize;
1683         hammer_alist_free(live, elm_no, 1);
1684         hammer_modify_cluster(cluster);
1685 }
1686
1687
1688 /*
1689  * Allocate a new filesystem buffer and assign it to the specified
1690  * filesystem buffer type.  The new buffer will be added to the
1691  * type-specific A-list and initialized.
1692  */
1693 static void
1694 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1695                  u_int64_t type, int32_t nelements,
1696                  int start, int *errorp, struct hammer_buffer **bufferp)
1697 {
1698         hammer_buffer_t buffer;
1699         int32_t buf_no;
1700
1701         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1702
1703         if (type == HAMMER_FSBUF_RECORDS) {
1704                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1705                                                 1, start);
1706                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1707                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1708                                                 1, HAMMER_ALIST_BLOCK_MAX);
1709                 }
1710         } else {
1711                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1712                                                 1, start);
1713                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1714                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1715                                                 1, 0);
1716                 }
1717         }
1718         KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1719         hammer_modify_cluster(cluster);
1720
1721         /*
1722          * The new buffer must be initialized (type != 0) regardless of
1723          * whether we already have it cached or not, so don't try to
1724          * optimize the cached buffer check.  Just call hammer_get_buffer().
1725          */
1726         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1727         if (*bufferp)
1728                 hammer_rel_buffer(*bufferp, 0);
1729         *bufferp = buffer;
1730
1731         /*
1732          * Finally, do a meta-free of the buffer's elements.
1733          */
1734         if (buffer) {
1735                 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1736                         buf_no, type, nelements);
1737                 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
1738                                   nelements);
1739         }
1740 }
1741
1742 #if 0
1743
1744 /*
1745  * Flush various tracking structures to disk
1746  */
1747
1748 /*
1749  * Flush various tracking structures to disk
1750  */
1751 void
1752 flush_all_volumes(void)
1753 {
1754         hammer_volume_t vol;
1755
1756         for (vol = VolBase; vol; vol = vol->next)
1757                 flush_volume(vol);
1758 }
1759
1760 void
1761 flush_volume(hammer_volume_t vol)
1762 {
1763         hammer_supercl_t supercl;
1764         hammer_cluster_t cl;
1765
1766         for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1767                 flush_supercl(supercl);
1768         for (cl = vol->cluster_base; cl; cl = cl->next)
1769                 flush_cluster(cl);
1770         writehammerbuf(vol, vol->ondisk, 0);
1771 }
1772
1773 void
1774 flush_supercl(hammer_supercl_t supercl)
1775 {
1776         int64_t supercl_offset;
1777
1778         supercl_offset = supercl->scl_offset;
1779         writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1780 }
1781
1782 void
1783 flush_cluster(hammer_cluster_t cl)
1784 {
1785         hammer_buffer_t buf;
1786         int64_t cluster_offset;
1787
1788         for (buf = cl->buffer_base; buf; buf = buf->next)
1789                 flush_buffer(buf);
1790         cluster_offset = cl->clu_offset;
1791         writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1792 }
1793
1794 void
1795 flush_buffer(hammer_buffer_t buf)
1796 {
1797         int64_t buffer_offset;
1798
1799         buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1800         writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1801 }
1802
1803 #endif
1804
1805 /*
1806  * Generic buffer initialization
1807  */
1808 static void
1809 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1810 {
1811         head->buf_type = type;
1812         hammer_alist_init(live);
1813 }
1814
1815 /*
1816  * Calculate the cluster's offset in the volume.  This calculation is
1817  * slightly more complex when using superclusters because superclusters
1818  * are grouped in blocks of 16, followed by 16 x N clusters where N
1819  * is the number of clusters a supercluster can manage.
1820  */
1821 static int64_t
1822 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1823 {
1824         int32_t scl_group;
1825         int64_t scl_group_size;
1826         int64_t off;
1827
1828         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1829                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1830                             HAMMER_SCL_MAXCLUSTERS;
1831                 scl_group_size = 
1832                             ((int64_t)HAMMER_BUFSIZE *
1833                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1834                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1835                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1836                 scl_group_size += 
1837                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1838
1839                 off = volume->cluster_base +
1840                       scl_group * scl_group_size +
1841                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1842                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1843                        HAMMER_VOL_SUPERCLUSTER_GROUP))
1844                       * HAMMER_BUFSIZE;
1845         } else {
1846                 off = volume->cluster_base +
1847                       (int64_t)clu_no * volume->vol_clsize;
1848         }
1849         return(off);
1850 }
1851
1852 /*
1853  * Calculate a super-cluster's offset in the volume.
1854  */
1855 static int64_t
1856 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1857 {
1858         int64_t off;
1859         int32_t scl_group;
1860         int64_t scl_group_size;
1861
1862         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1863         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1864         if (scl_group) {
1865                 scl_group_size = 
1866                             ((int64_t)HAMMER_BUFSIZE *
1867                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
1868                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1869                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1870                 scl_group_size += 
1871                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1872                 off = volume->cluster_base + (scl_group * scl_group_size) +
1873                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1874         } else {
1875                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1876         }
1877         return(off);
1878 }
1879
1880 /*
1881  * A-LIST SUPPORT
1882  *
1883  * Setup the parameters for the various A-lists we use in hammer.  The
1884  * supercluster A-list must be chained to the cluster A-list and cluster
1885  * slave A-lists are chained to buffer A-lists.
1886  *
1887  * See hammer_init_alist_config() below.
1888  */
1889
1890 /*
1891  * A-LIST - cluster recursion into a filesystem buffer
1892  */
1893 static int
1894 buffer_alist_init(void *info, int32_t blk, int32_t radix)
1895 {
1896         hammer_cluster_t cluster = info;
1897         hammer_buffer_t buffer;
1898         int32_t buf_no;
1899         int error = 0;
1900
1901         /*
1902          * Calculate the buffer number, initialize based on the buffer type.
1903          * The buffer has already been allocated so assert that it has been
1904          * initialized.
1905          */
1906         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1907         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1908         if (buffer)
1909                 hammer_rel_buffer(buffer, 0);
1910         return (error);
1911 }
1912
1913 static int
1914 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
1915 {
1916         return (0);
1917 }
1918
1919 /*
1920  * Note: atblk can be negative and atblk - blk can go negative.
1921  */
1922 static int
1923 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1924                       int32_t count, int32_t atblk, int32_t *fullp)
1925 {
1926         hammer_cluster_t cluster = info;
1927         hammer_buffer_t buffer;
1928         int32_t buf_no;
1929         int32_t r;
1930         int error = 0;
1931
1932         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1933         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1934         if (buffer) {
1935                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1936
1937                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
1938                 if (r != HAMMER_ALIST_BLOCK_NONE)
1939                         r += blk;
1940                 *fullp = hammer_alist_isfull(&buffer->alist);
1941                 hammer_modify_buffer(buffer);
1942                 hammer_rel_buffer(buffer, 0);
1943         } else {
1944                 r = HAMMER_ALIST_BLOCK_NONE;
1945         }
1946         return(r);
1947 }
1948
1949 static int
1950 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1951                       int32_t count, int32_t atblk, int32_t *fullp)
1952 {
1953         hammer_cluster_t cluster = info;
1954         hammer_buffer_t buffer;
1955         int32_t buf_no;
1956         int32_t r;
1957         int error = 0;
1958
1959         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1960         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1961         if (buffer) {
1962                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1963
1964                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
1965                 if (r != HAMMER_ALIST_BLOCK_NONE)
1966                         r += blk;
1967                 *fullp = hammer_alist_isfull(&buffer->alist);
1968                 hammer_modify_buffer(buffer);
1969                 hammer_rel_buffer(buffer, 0);
1970         } else {
1971                 r = HAMMER_ALIST_BLOCK_NONE;
1972                 *fullp = 0;
1973         }
1974         return(r);
1975 }
1976
1977 static void
1978 buffer_alist_free(void *info, int32_t blk, int32_t radix,
1979                  int32_t base_blk, int32_t count, int32_t *emptyp)
1980 {
1981         hammer_cluster_t cluster = info;
1982         hammer_buffer_t buffer;
1983         int32_t buf_no;
1984         int error = 0;
1985
1986         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1987         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1988         if (buffer) {
1989                 KKASSERT(buffer->ondisk->head.buf_type != 0);
1990                 hammer_alist_free(&buffer->alist, base_blk, count);
1991                 *emptyp = hammer_alist_isempty(&buffer->alist);
1992                 hammer_modify_buffer(buffer);
1993                 hammer_rel_buffer(buffer, 0);
1994         } else {
1995                 *emptyp = 0;
1996         }
1997 }
1998
1999 static void
2000 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2001 {
2002 }
2003
2004 /*
2005  * A-LIST - super-cluster recursion into a cluster and cluster recursion
2006  * into a filesystem buffer.  A-List's are mostly self-contained entities,
2007  * but callbacks must be installed to recurse from one A-List to another.
2008  *
2009  * Implementing these callbacks allows us to operate a multi-layered A-List
2010  * as a single entity.
2011  */
2012 static int
2013 super_alist_init(void *info, int32_t blk, int32_t radix)
2014 {
2015         hammer_volume_t volume = info;
2016         hammer_supercl_t supercl;
2017         int32_t scl_no;
2018         int error = 0;
2019
2020         /*
2021          * Calculate the super-cluster number containing the cluster (blk)
2022          * and obtain the super-cluster buffer.
2023          */
2024         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2025         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2026         if (supercl)
2027                 hammer_rel_supercl(supercl, 0);
2028         return (error);
2029 }
2030
2031 static int
2032 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2033 {
2034         return(0);
2035 }
2036
2037 static int
2038 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2039                       int32_t count, int32_t atblk, int32_t *fullp)
2040 {
2041         hammer_volume_t volume = info;
2042         hammer_supercl_t supercl;
2043         int32_t scl_no;
2044         int32_t r;
2045         int error = 0;
2046
2047         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2048         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2049         if (supercl) {
2050                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2051                 if (r != HAMMER_ALIST_BLOCK_NONE)
2052                         r += blk;
2053                 *fullp = hammer_alist_isfull(&supercl->alist);
2054                 hammer_modify_supercl(supercl);
2055                 hammer_rel_supercl(supercl, 0);
2056         } else {
2057                 r = HAMMER_ALIST_BLOCK_NONE;
2058                 *fullp = 0;
2059         }
2060         return(r);
2061 }
2062
2063 static int
2064 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2065                       int32_t count, int32_t atblk, int32_t *fullp)
2066 {
2067         hammer_volume_t volume = info;
2068         hammer_supercl_t supercl;
2069         int32_t scl_no;
2070         int32_t r;
2071         int error = 0;
2072
2073         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2074         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2075         if (supercl) {
2076                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2077                 if (r != HAMMER_ALIST_BLOCK_NONE)
2078                         r += blk;
2079                 *fullp = hammer_alist_isfull(&supercl->alist);
2080                 hammer_modify_supercl(supercl);
2081                 hammer_rel_supercl(supercl, 0);
2082         } else { 
2083                 r = HAMMER_ALIST_BLOCK_NONE;
2084                 *fullp = 0;
2085         }
2086         return(r);
2087 }
2088
2089 static void
2090 super_alist_free(void *info, int32_t blk, int32_t radix,
2091                  int32_t base_blk, int32_t count, int32_t *emptyp)
2092 {
2093         hammer_volume_t volume = info;
2094         hammer_supercl_t supercl;
2095         int32_t scl_no;
2096         int error = 0;
2097
2098         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2099         supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2100         if (supercl) {
2101                 hammer_alist_free(&supercl->alist, base_blk, count);
2102                 *emptyp = hammer_alist_isempty(&supercl->alist);
2103                 hammer_modify_supercl(supercl);
2104                 hammer_rel_supercl(supercl, 0);
2105         } else {
2106                 *emptyp = 0;
2107         }
2108 }
2109
2110 static void
2111 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2112 {
2113 }
2114
2115 void
2116 hammer_init_alist_config(void)
2117 {
2118         hammer_alist_config_t config;
2119
2120         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2121                               1, HAMMER_FSBUF_METAELMS);
2122         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2123                               1, HAMMER_VOL_METAELMS_1LYR);
2124         hammer_alist_template(&Vol_super_alist_config,
2125                           HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2126                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2127         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2128                               1, HAMMER_SUPERCL_METAELMS);
2129         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2130                               1, HAMMER_CLU_MASTER_METAELMS);
2131         hammer_alist_template(&Clu_slave_alist_config,
2132                               HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2133                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2134
2135         config = &Vol_super_alist_config;
2136         config->bl_radix_init = super_alist_init;
2137         config->bl_radix_destroy = super_alist_destroy;
2138         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2139         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2140         config->bl_radix_free = super_alist_free;
2141         config->bl_radix_print = super_alist_print;
2142
2143         config = &Clu_slave_alist_config;
2144         config->bl_radix_init = buffer_alist_init;
2145         config->bl_radix_destroy = buffer_alist_destroy;
2146         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2147         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2148         config->bl_radix_free = buffer_alist_free;
2149         config->bl_radix_print = buffer_alist_print;
2150 }
2151