Merge from vendor branch FILE:
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.22 2008/01/18 07:02:41 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl,
51                         hammer_alloc_state_t isnew);
52 static int hammer_load_cluster(hammer_cluster_t cluster,
53                         hammer_alloc_state_t isnew);
54 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
55 static int hammer_load_node(hammer_node_t node);
56 static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type,
57                         hammer_alist_t live,
58                         int32_t start, int *errorp,
59                         struct hammer_buffer **bufferp);
60 #if 0
61 static void readhammerbuf(hammer_volume_t vol, void *data,
62                         int64_t offset);
63 static void writehammerbuf(hammer_volume_t vol, const void *data,
64                         int64_t offset);
65 #endif
66 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
67 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
68 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
69                         int32_t start, int isfwd);
70 static void hammer_adjust_stats(hammer_cluster_t cluster,
71                         u_int64_t buf_type, int nblks);
72
73 struct hammer_alist_config Buf_alist_config;
74 struct hammer_alist_config Vol_normal_alist_config;
75 struct hammer_alist_config Vol_super_alist_config;
76 struct hammer_alist_config Supercl_alist_config;
77 struct hammer_alist_config Clu_master_alist_config;
78 struct hammer_alist_config Clu_slave_alist_config;
79
80 /*
81  * Red-Black tree support for various structures
82  */
83 static int
84 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
85 {
86         if (ip1->obj_id < ip2->obj_id)
87                 return(-1);
88         if (ip1->obj_id > ip2->obj_id)
89                 return(1);
90         if (ip1->obj_asof < ip2->obj_asof)
91                 return(-1);
92         if (ip1->obj_asof > ip2->obj_asof)
93                 return(1);
94         return(0);
95 }
96
97 static int
98 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
99 {
100         if (info->obj_id < ip->obj_id)
101                 return(-1);
102         if (info->obj_id > ip->obj_id)
103                 return(1);
104         if (info->obj_asof < ip->obj_asof)
105                 return(-1);
106         if (info->obj_asof > ip->obj_asof)
107                 return(1);
108         return(0);
109 }
110
111 static int
112 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
113 {
114         if (vol1->vol_no < vol2->vol_no)
115                 return(-1);
116         if (vol1->vol_no > vol2->vol_no)
117                 return(1);
118         return(0);
119 }
120
121 static int
122 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
123 {
124         if (cl1->scl_no < cl2->scl_no)
125                 return(-1);
126         if (cl1->scl_no > cl2->scl_no)
127                 return(1);
128         return(0);
129 }
130
131 static int
132 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
133 {
134         if (cl1->clu_no < cl2->clu_no)
135                 return(-1);
136         if (cl1->clu_no > cl2->clu_no)
137                 return(1);
138         return(0);
139 }
140
141 static int
142 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
143 {
144         if (buf1->buf_no < buf2->buf_no)
145                 return(-1);
146         if (buf1->buf_no > buf2->buf_no)
147                 return(1);
148         return(0);
149 }
150
151 static int
152 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
153 {
154         if (node1->node_offset < node2->node_offset)
155                 return(-1);
156         if (node1->node_offset > node2->node_offset)
157                 return(1);
158         return(0);
159 }
160
161 /*
162  * Note: The lookup function for hammer_ino_rb_tree winds up being named
163  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
164  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
165  */
166 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
167 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
168                 hammer_inode_info_cmp, hammer_inode_info_t);
169 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
170              hammer_vol_rb_compare, int32_t, vol_no);
171 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
172              hammer_scl_rb_compare, int32_t, scl_no);
173 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
174              hammer_clu_rb_compare, int32_t, clu_no);
175 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
176              hammer_buf_rb_compare, int32_t, buf_no);
177 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
178              hammer_nod_rb_compare, int32_t, node_offset);
179
180 /************************************************************************
181  *                              VOLUMES                                 *
182  ************************************************************************
183  *
184  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
185  * code on failure.  Volumes must be loaded at mount time, get_volume() will
186  * not load a new volume.
187  *
188  * Calls made to hammer_load_volume() or single-threaded
189  */
190 int
191 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
192 {
193         struct mount *mp;
194         hammer_volume_t volume;
195         struct hammer_volume_ondisk *ondisk;
196         struct nlookupdata nd;
197         struct buf *bp = NULL;
198         int error;
199         int ronly;
200
201         mp = hmp->mp;
202         ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
203
204         /*
205          * Allocate a volume structure
206          */
207         ++hammer_count_volumes;
208         volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
209         volume->vol_name = kstrdup(volname, M_HAMMER);
210         volume->hmp = hmp;
211         hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
212         volume->io.offset = 0LL;
213
214         /*
215          * Get the device vnode
216          */
217         error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
218         if (error == 0)
219                 error = nlookup(&nd);
220         if (error == 0)
221                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
222         nlookup_done(&nd);
223         if (error == 0) {
224                 vn_isdisk(volume->devvp, &error);
225         }
226         if (error == 0) {
227                 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
228                 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
229                                  FSCRED, NULL);
230                 vn_unlock(volume->devvp);
231         }
232         if (error) {
233                 hammer_free_volume(volume);
234                 return(error);
235         }
236
237         /*
238          * Extract the volume number from the volume header and do various
239          * sanity checks.
240          */
241         error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
242         if (error)
243                 goto late_failure;
244         ondisk = (void *)bp->b_data;
245         if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
246                 kprintf("hammer_mount: volume %s has an invalid header\n",
247                         volume->vol_name);
248                 error = EFTYPE;
249                 goto late_failure;
250         }
251         volume->vol_no = ondisk->vol_no;
252         volume->cluster_base = ondisk->vol_clo_beg;
253         volume->vol_clsize = ondisk->vol_clsize;
254         volume->vol_flags = ondisk->vol_flags;
255         volume->nblocks = ondisk->vol_nblocks; 
256         RB_INIT(&volume->rb_clus_root);
257         RB_INIT(&volume->rb_scls_root);
258
259         hmp->mp->mnt_stat.f_blocks += volume->nblocks;
260
261         if (RB_EMPTY(&hmp->rb_vols_root)) {
262                 hmp->fsid = ondisk->vol_fsid;
263         } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
264                 kprintf("hammer_mount: volume %s's fsid does not match "
265                         "other volumes\n", volume->vol_name);
266                 error = EFTYPE;
267                 goto late_failure;
268         }
269
270         /*
271          * Insert the volume structure into the red-black tree.
272          */
273         if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
274                 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
275                         volume->vol_name, volume->vol_no);
276                 error = EEXIST;
277         }
278
279         /*
280          * Set the root volume and load the root cluster.  HAMMER special
281          * cases rootvol and rootcl and will not deallocate the structures.
282          * We do not hold a ref because this would prevent related I/O
283          * from being flushed.
284          */
285         if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
286                 hmp->rootvol = volume;
287                 if (bp) {
288                         brelse(bp);
289                         bp = NULL;
290                 }
291                 hammer_ref_volume(volume);
292                 hmp->rootcl = hammer_get_cluster(volume,
293                                                  ondisk->vol0_root_clu_no,
294                                                  &error, 0);
295                 hammer_rel_cluster(hmp->rootcl, 0);
296                 hammer_rel_volume(volume, 0);
297                 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
298         }
299 late_failure:
300         if (bp)
301                 brelse(bp);
302         if (error) {
303                 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
304                 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
305                 hammer_free_volume(volume);
306         }
307         return (error);
308 }
309
310 /*
311  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
312  * so returns -1 on failure.
313  */
314 int
315 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
316 {
317         struct hammer_mount *hmp = volume->hmp;
318         hammer_cluster_t rootcl;
319         int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
320
321         /*
322          * Sync clusters, sync volume
323          */
324
325         hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
326
327         /*
328          * Clean up the root cluster, which is held unlocked in the root
329          * volume.
330          */
331         if (hmp->rootvol == volume) {
332                 if ((rootcl = hmp->rootcl) != NULL)
333                         hmp->rootcl = NULL;
334                 hmp->rootvol = NULL;
335         }
336
337         /*
338          * Unload clusters and super-clusters.  Unloading a super-cluster
339          * also unloads related clusters, but the filesystem may not be
340          * using super-clusters so unload clusters anyway.
341          */
342         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
343                         hammer_unload_cluster, NULL);
344         RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
345                         hammer_unload_supercl, NULL);
346         hammer_io_waitdep(&volume->io);
347
348         /*
349          * Release our buffer and flush anything left in the buffer cache.
350          */
351         hammer_io_release(&volume->io, 2);
352
353         /*
354          * There should be no references on the volume, no clusters, and
355          * no super-clusters.
356          */
357         KKASSERT(volume->io.lock.refs == 0);
358         KKASSERT(RB_EMPTY(&volume->rb_clus_root));
359         KKASSERT(RB_EMPTY(&volume->rb_scls_root));
360
361         volume->ondisk = NULL;
362         if (volume->devvp) {
363                 if (ronly) {
364                         vinvalbuf(volume->devvp, 0, 0, 0);
365                         VOP_CLOSE(volume->devvp, FREAD);
366                 } else {
367                         vinvalbuf(volume->devvp, V_SAVE, 0, 0);
368                         VOP_CLOSE(volume->devvp, FREAD|FWRITE);
369                 }
370         }
371
372         /*
373          * Destroy the structure
374          */
375         RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
376         hammer_free_volume(volume);
377         return(0);
378 }
379
380 static
381 void
382 hammer_free_volume(hammer_volume_t volume)
383 {
384         if (volume->vol_name) {
385                 kfree(volume->vol_name, M_HAMMER);
386                 volume->vol_name = NULL;
387         }
388         if (volume->devvp) {
389                 vrele(volume->devvp);
390                 volume->devvp = NULL;
391         }
392         --hammer_count_volumes;
393         kfree(volume, M_HAMMER);
394 }
395
396 /*
397  * Get a HAMMER volume.  The volume must already exist.
398  */
399 hammer_volume_t
400 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
401 {
402         struct hammer_volume *volume;
403
404         /*
405          * Locate the volume structure
406          */
407         volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
408         if (volume == NULL) {
409                 *errorp = ENOENT;
410                 return(NULL);
411         }
412         hammer_ref(&volume->io.lock);
413
414         /*
415          * Deal with on-disk info
416          */
417         if (volume->ondisk == NULL) {
418                 *errorp = hammer_load_volume(volume);
419                 if (*errorp) {
420                         hammer_rel_volume(volume, 1);
421                         volume = NULL;
422                 }
423         } else {
424                 *errorp = 0;
425         }
426         return(volume);
427 }
428
429 int
430 hammer_ref_volume(hammer_volume_t volume)
431 {
432         int error;
433
434         hammer_ref(&volume->io.lock);
435
436         /*
437          * Deal with on-disk info
438          */
439         if (volume->ondisk == NULL) {
440                 error = hammer_load_volume(volume);
441                 if (error)
442                         hammer_rel_volume(volume, 1);
443         } else {
444                 error = 0;
445         }
446         return (error);
447 }
448
449 hammer_volume_t
450 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
451 {
452         hammer_volume_t volume;
453
454         volume = hmp->rootvol;
455         KKASSERT(volume != NULL);
456         hammer_ref(&volume->io.lock);
457
458         /*
459          * Deal with on-disk info
460          */
461         if (volume->ondisk == NULL) {
462                 *errorp = hammer_load_volume(volume);
463                 if (*errorp) {
464                         hammer_rel_volume(volume, 1);
465                         volume = NULL;
466                 }
467         } else {
468                 *errorp = 0;
469         }
470         return (volume);
471 }
472
473 /*
474  * Load a volume's on-disk information.  The volume must be referenced and
475  * not locked.  We temporarily acquire an exclusive lock to interlock
476  * against releases or multiple get's.
477  */
478 static int
479 hammer_load_volume(hammer_volume_t volume)
480 {
481         struct hammer_volume_ondisk *ondisk;
482         int error;
483
484         hammer_lock_ex(&volume->io.lock);
485         if (volume->ondisk == NULL) {
486                 error = hammer_io_read(volume->devvp, &volume->io);
487                 if (error) {
488                         hammer_unlock(&volume->io.lock);
489                         return (error);
490                 }
491                 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
492
493                 /*
494                  * Configure the volume's A-lists.  These are used to
495                  * allocate clusters.
496                  */
497                 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
498                         volume->alist.config = &Vol_super_alist_config;
499                         volume->alist.meta = ondisk->vol_almeta.super;
500                         volume->alist.info = volume;
501                 } else {
502                         volume->alist.config = &Vol_normal_alist_config;
503                         volume->alist.meta = ondisk->vol_almeta.normal;
504                         volume->alist.info = NULL;
505                 }
506         } else {
507                 error = 0;
508         }
509         hammer_unlock(&volume->io.lock);
510         return(0);
511 }
512
513 /*
514  * Release a volume.  Call hammer_io_release on the last reference.  We have
515  * to acquire an exclusive lock to interlock against volume->ondisk tests
516  * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
517  * lock to be held.
518  *
519  * Volumes are not unloaded from memory during normal operation.
520  */
521 void
522 hammer_rel_volume(hammer_volume_t volume, int flush)
523 {
524         if (volume->io.lock.refs == 1) {
525                 hammer_lock_ex(&volume->io.lock);
526                 if (volume->io.lock.refs == 1) {
527                         volume->ondisk = NULL;
528                         hammer_io_release(&volume->io, flush);
529                 } else if (flush) {
530                         hammer_io_flush(&volume->io);
531                 }
532                 hammer_unlock(&volume->io.lock);
533         }
534         hammer_unref(&volume->io.lock);
535 }
536
537 /************************************************************************
538  *                              SUPER-CLUSTERS                          *
539  ************************************************************************
540  *
541  * Manage super-clusters.  Note that a supercl holds a reference to its
542  * associated volume.
543  */
544 static int
545 hammer_find_supercl(hammer_volume_t volume, int32_t scl_no)
546 {
547         if (RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no))
548                 return(1);
549         return(0);
550 }
551
552 hammer_supercl_t
553 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
554                    int *errorp, hammer_alloc_state_t isnew)
555 {
556         hammer_supercl_t supercl;
557
558         /*
559          * Locate and lock the super-cluster structure, creating one
560          * if necessary.
561          */
562 again:
563         supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
564         if (supercl == NULL) {
565                 ++hammer_count_supercls;
566                 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
567                 supercl->scl_no = scl_no;
568                 supercl->volume = volume;
569                 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
570                 hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL);
571                 hammer_ref(&supercl->io.lock);
572
573                 /*
574                  * Insert the cluster into the RB tree and handle late
575                  * collisions.
576                  */
577                 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
578                         hammer_unref(&supercl->io.lock);
579                         --hammer_count_supercls;
580                         kfree(supercl, M_HAMMER);
581                         goto again;
582                 }
583                 hammer_ref(&volume->io.lock);
584         } else {
585                 hammer_ref(&supercl->io.lock);
586         }
587
588         /*
589          * Deal with on-disk info
590          */
591         if (supercl->ondisk == NULL || isnew) {
592                 *errorp = hammer_load_supercl(supercl, isnew);
593                 if (*errorp) {
594                         hammer_rel_supercl(supercl, 1);
595                         supercl = NULL;
596                 }
597         } else {
598                 *errorp = 0;
599         }
600         return(supercl);
601 }
602
603 static int
604 hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew)
605 {
606         struct hammer_supercl_ondisk *ondisk;
607         hammer_volume_t volume = supercl->volume;
608         int error;
609         int64_t nclusters;
610
611         hammer_lock_ex(&supercl->io.lock);
612         if (supercl->ondisk == NULL) {
613                 if (isnew)
614                         error = hammer_io_new(volume->devvp, &supercl->io);
615                 else
616                         error = hammer_io_read(volume->devvp, &supercl->io);
617                 if (error) {
618                         hammer_unlock(&supercl->io.lock);
619                         return (error);
620                 }
621                 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
622
623                 supercl->alist.config = &Supercl_alist_config;
624                 supercl->alist.meta = ondisk->scl_meta;
625                 supercl->alist.info = NULL;
626         } else if (isnew) {
627                 error = hammer_io_new(volume->devvp, &supercl->io);
628         } else {
629                 error = 0;
630         }
631         if (error == 0 && isnew) {
632                 /*
633                  * If this is a new super-cluster we have to initialize
634                  * various ondisk structural elements.  The caller is
635                  * responsible for the remainder.
636                  */
637                 struct hammer_alist_live dummy;
638
639                 hammer_modify_supercl(supercl);
640
641                 ondisk = supercl->ondisk;
642                 dummy.config = &Buf_alist_config;
643                 dummy.meta = ondisk->head.buf_almeta;
644                 dummy.info = NULL;
645                 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
646
647                 nclusters = volume->ondisk->vol_nclusters -
648                             ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
649                 KKASSERT(nclusters > 0);
650                 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
651                         nclusters = HAMMER_SCL_MAXCLUSTERS;
652                 hammer_alist_init(&supercl->alist, 0, (int32_t)nclusters,
653                                   isnew);
654         }
655         hammer_unlock(&supercl->io.lock);
656         return (error);
657 }
658
659 /*
660  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
661  */
662 int
663 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
664 {
665         KKASSERT(supercl->io.lock.refs == 0);
666         hammer_ref(&supercl->io.lock);
667         hammer_rel_supercl(supercl, 2);
668         return(0);
669 }
670
671 /*
672  * Release a super-cluster.  We have to deal with several places where
673  * another thread can ref the super-cluster.
674  *
675  * Only destroy the structure itself if the related buffer cache buffer
676  * was disassociated from it.  This ties the management of the structure
677  * to the buffer cache subsystem.
678  */
679 void
680 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
681 {
682         hammer_volume_t volume;
683
684         if (supercl->io.lock.refs == 1) {
685                 hammer_lock_ex(&supercl->io.lock);
686                 if (supercl->io.lock.refs == 1) {
687                         hammer_io_release(&supercl->io, flush);
688                         if (supercl->io.bp == NULL &&
689                             supercl->io.lock.refs == 1) {
690                                 volume = supercl->volume;
691                                 RB_REMOVE(hammer_scl_rb_tree,
692                                           &volume->rb_scls_root, supercl);
693                                 supercl->volume = NULL; /* sanity */
694                                 --hammer_count_supercls;
695                                 kfree(supercl, M_HAMMER);
696                                 hammer_rel_volume(volume, 0);
697                                 return;
698                         }
699                 } else if (flush) {
700                         hammer_io_flush(&supercl->io);
701                 }
702                 hammer_unlock(&supercl->io.lock);
703         }
704         hammer_unref(&supercl->io.lock);
705 }
706
707 /************************************************************************
708  *                              CLUSTERS                                *
709  ************************************************************************
710  *
711  */
712 hammer_cluster_t
713 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
714                    int *errorp, hammer_alloc_state_t isnew)
715 {
716         hammer_cluster_t cluster;
717
718 again:
719         cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
720         if (cluster == NULL) {
721                 ++hammer_count_clusters;
722                 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
723                 cluster->clu_no = clu_no;
724                 cluster->volume = volume;
725                 RB_INIT(&cluster->rb_bufs_root);
726                 RB_INIT(&cluster->rb_nods_root);
727                 hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER);
728                 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
729                 hammer_ref(&cluster->io.lock);
730
731                 /*
732                  * Insert the cluster into the RB tree and handle late
733                  * collisions.
734                  */
735                 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
736                         hammer_unref(&cluster->io.lock);
737                         --hammer_count_clusters;
738                         kfree(cluster, M_HAMMER);
739                         goto again;
740                 }
741                 hammer_ref(&volume->io.lock);
742         } else {
743                 hammer_ref(&cluster->io.lock);
744         }
745
746         /*
747          * Deal with on-disk info
748          */
749         if (cluster->ondisk == NULL || isnew) {
750                 *errorp = hammer_load_cluster(cluster, isnew);
751                 if (*errorp) {
752                         hammer_rel_cluster(cluster, 1);
753                         cluster = NULL;
754                 }
755         } else {
756                 *errorp = 0;
757         }
758         return (cluster);
759 }
760
761 hammer_cluster_t
762 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
763 {
764         hammer_cluster_t cluster;
765
766         cluster = hmp->rootcl;
767         KKASSERT(cluster != NULL);
768         hammer_ref(&cluster->io.lock);
769
770         /*
771          * Deal with on-disk info
772          */
773         if (cluster->ondisk == NULL) {
774                 *errorp = hammer_load_cluster(cluster, 0);
775                 if (*errorp) {
776                         hammer_rel_cluster(cluster, 1);
777                         cluster = NULL;
778                 }
779         } else {
780                 *errorp = 0;
781         }
782         return (cluster);
783 }
784
785 static
786 int
787 hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew)
788 {
789         hammer_volume_t volume = cluster->volume;
790         struct hammer_cluster_ondisk *ondisk;
791         int error;
792
793         /*
794          * Load the cluster's on-disk info
795          */
796         hammer_lock_ex(&cluster->io.lock);
797         if (cluster->ondisk == NULL) {
798                 if (isnew)
799                         error = hammer_io_new(volume->devvp, &cluster->io);
800                 else
801                         error = hammer_io_read(volume->devvp, &cluster->io);
802                 if (error) {
803                         hammer_unlock(&cluster->io.lock);
804                         return (error);
805                 }
806                 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
807
808                 cluster->alist_master.config = &Clu_master_alist_config;
809                 cluster->alist_master.meta = ondisk->clu_master_meta;
810                 cluster->alist_btree.config = &Clu_slave_alist_config;
811                 cluster->alist_btree.meta = ondisk->clu_btree_meta;
812                 cluster->alist_btree.info = cluster;
813                 cluster->alist_record.config = &Clu_slave_alist_config;
814                 cluster->alist_record.meta = ondisk->clu_record_meta;
815                 cluster->alist_record.info = cluster;
816                 cluster->alist_mdata.config = &Clu_slave_alist_config;
817                 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
818                 cluster->alist_mdata.info = cluster;
819
820                 if (isnew == 0) {
821                         /*
822                          * Load cluster range info for easy access
823                          */
824                         cluster->clu_btree_beg = ondisk->clu_btree_beg;
825                         cluster->clu_btree_end = ondisk->clu_btree_end;
826
827                         /*
828                          * Recover a cluster that was marked open.  This
829                          * can be rather involved and block for a hefty
830                          * chunk of time.
831                          */
832                         /*if (ondisk->clu_flags & HAMMER_CLUF_OPEN)*/
833                                 hammer_recover(cluster);
834                 }
835         } else if (isnew) {
836                 error = hammer_io_new(volume->devvp, &cluster->io);
837         } else {
838                 error = 0;
839         }
840         if (error == 0 && isnew) {
841                 /*
842                  * If this is a new cluster we have to initialize
843                  * various ondisk structural elements.  The caller is
844                  * responsible for the remainder.
845                  */
846                 struct hammer_alist_live dummy;
847                 hammer_node_t croot;
848                 hammer_volume_ondisk_t voldisk;
849                 int32_t nbuffers;
850
851                 hammer_modify_cluster(cluster);
852                 ondisk = cluster->ondisk;
853                 voldisk = volume->ondisk;
854
855                 dummy.config = &Buf_alist_config;
856                 dummy.meta = ondisk->head.buf_almeta;
857                 dummy.info = NULL;
858                 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
859
860                 ondisk->vol_fsid = voldisk->vol_fsid;
861                 ondisk->vol_fstype = voldisk->vol_fstype;
862                 ondisk->clu_gen = 1;
863                 ondisk->clu_id = 0;     /* XXX */
864                 ondisk->clu_no = cluster->clu_no;
865                 ondisk->clu_flags = 0;
866                 ondisk->clu_start = HAMMER_BUFSIZE;
867                 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
868                 if (voldisk->vol_clo_end - cluster->io.offset >
869                     voldisk->vol_clsize) {
870                         ondisk->clu_limit = voldisk->vol_clsize;
871                 } else {
872                         ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
873                                                       cluster->io.offset);
874                 }
875                 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
876                 KKASSERT(isnew == HAMMER_ASTATE_FREE);
877                 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
878                                   HAMMER_ASTATE_FREE);
879                 hammer_alist_init(&cluster->alist_btree,
880                                   HAMMER_FSBUF_MAXBLKS,
881                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
882                                   HAMMER_ASTATE_ALLOC);
883                 hammer_alist_init(&cluster->alist_record,
884                                   HAMMER_FSBUF_MAXBLKS,
885                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
886                                   HAMMER_ASTATE_ALLOC);
887                 hammer_alist_init(&cluster->alist_mdata,
888                                   HAMMER_FSBUF_MAXBLKS,
889                                   (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
890                                   HAMMER_ASTATE_ALLOC);
891
892                 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
893                 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
894                 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
895
896                 /*
897                  * Initialize the B-Tree.  We don't know what the caller
898                  * intends to do with the cluster so make sure it causes
899                  * an assertion if the caller makes no changes.
900                  */
901                 ondisk->clu_btree_parent_vol_no = -2;
902                 ondisk->clu_btree_parent_clu_no = -2;
903                 ondisk->clu_btree_parent_offset = -2;
904                 ondisk->clu_btree_parent_clu_gen = -2;
905
906                 croot = hammer_alloc_btree(cluster, &error);
907                 if (error == 0) {
908                         hammer_modify_node(croot);
909                         bzero(croot->ondisk, sizeof(*croot->ondisk));
910                         croot->ondisk->count = 0;
911                         croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
912                         hammer_modify_cluster(cluster);
913                         ondisk->clu_btree_root = croot->node_offset;
914                         hammer_rel_node(croot);
915                 }
916         }
917         hammer_unlock(&cluster->io.lock);
918         return (error);
919 }
920
921 /*
922  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
923  */
924 int
925 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
926 {
927         hammer_ref(&cluster->io.lock);
928         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
929                 hammer_unload_buffer, NULL);
930         hammer_io_waitdep(&cluster->io);
931         KKASSERT(cluster->io.lock.refs == 1);
932         hammer_rel_cluster(cluster, 2);
933         return(0);
934 }
935
936 /*
937  * Update the cluster's synchronization TID, which is used during cluster
938  * recovery.  NOTE: The cluster header is not written out until all related
939  * records have been written out.
940  */
941 void
942 hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid)
943 {
944         hammer_modify_cluster(cluster);
945         if (cluster->ondisk->synchronized_tid < tid)
946                 cluster->ondisk->synchronized_tid = tid;
947 }
948
949 /*
950  * Reference a cluster that is either already referenced or via a specially
951  * handled pointer (aka rootcl).
952  */
953 int
954 hammer_ref_cluster(hammer_cluster_t cluster)
955 {
956         int error;
957
958         KKASSERT(cluster != NULL);
959         hammer_ref(&cluster->io.lock);
960
961         /*
962          * Deal with on-disk info
963          */
964         if (cluster->ondisk == NULL) {
965                 error = hammer_load_cluster(cluster, 0);
966                 if (error)
967                         hammer_rel_cluster(cluster, 1);
968         } else {
969                 error = 0;
970         }
971         return(error);
972 }
973
974 /*
975  * Release a cluster.  We have to deal with several places where
976  * another thread can ref the cluster.
977  *
978  * Only destroy the structure itself if we no longer have an IO or any
979  * hammer buffers associated with the structure.
980  */
981 void
982 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
983 {
984         hammer_volume_t volume;
985
986         if (cluster->io.lock.refs == 1) {
987                 hammer_lock_ex(&cluster->io.lock);
988                 if (cluster->io.lock.refs == 1) {
989                         /*
990                          * Release the I/O.  If we or the kernel wants to
991                          * flush, this will release the bp.  Otherwise the
992                          * bp may be written and flushed passively by the
993                          * kernel later on.
994                          */
995                         hammer_io_release(&cluster->io, flush);
996
997                         /*
998                          * Final cleanup
999                          */
1000                         if (cluster != cluster->volume->hmp->rootcl &&
1001                             cluster->io.bp == NULL &&
1002                             cluster->io.lock.refs == 1 &&
1003                             RB_EMPTY(&cluster->rb_bufs_root)) {
1004                                 KKASSERT(RB_EMPTY(&cluster->rb_nods_root));
1005                                 volume = cluster->volume;
1006                                 RB_REMOVE(hammer_clu_rb_tree,
1007                                           &volume->rb_clus_root, cluster);
1008                                 cluster->volume = NULL; /* sanity */
1009                                 --hammer_count_clusters;
1010                                 kfree(cluster, M_HAMMER);
1011                                 hammer_rel_volume(volume, 0);
1012                                 return;
1013                         }
1014                 } else if (flush) {
1015                         hammer_io_flush(&cluster->io);
1016                 }
1017                 hammer_unlock(&cluster->io.lock);
1018         }
1019         hammer_unref(&cluster->io.lock);
1020 }
1021
1022 /************************************************************************
1023  *                              BUFFERS                                 *
1024  ************************************************************************
1025  *
1026  * Manage buffers.  Note that a buffer holds a reference to its associated
1027  * cluster, and its cluster will hold a reference to the cluster's volume.
1028  *
1029  * A non-zero buf_type indicates that a new buffer should be created and
1030  * zero'd.
1031  */
1032 hammer_buffer_t
1033 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
1034                   u_int64_t buf_type, int *errorp)
1035 {
1036         hammer_buffer_t buffer;
1037
1038         /*
1039          * Find the buffer.  Note that buffer 0 corresponds to the cluster
1040          * header and should never be requested.
1041          */
1042         KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1043                  buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1044
1045         /*
1046          * Locate and lock the buffer structure, creating one if necessary.
1047          */
1048 again:
1049         buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1050         if (buffer == NULL) {
1051                 ++hammer_count_buffers;
1052                 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1053                 buffer->buf_no = buf_no;
1054                 buffer->cluster = cluster;
1055                 buffer->volume = cluster->volume;
1056                 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
1057                 buffer->io.offset = cluster->io.offset +
1058                                     (buf_no * HAMMER_BUFSIZE);
1059                 TAILQ_INIT(&buffer->clist);
1060                 hammer_ref(&buffer->io.lock);
1061
1062                 /*
1063                  * Insert the cluster into the RB tree and handle late
1064                  * collisions.
1065                  */
1066                 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1067                         hammer_unref(&buffer->io.lock);
1068                         --hammer_count_buffers;
1069                         kfree(buffer, M_HAMMER);
1070                         goto again;
1071                 }
1072                 hammer_ref(&cluster->io.lock);
1073         } else {
1074                 hammer_ref(&buffer->io.lock);
1075         }
1076
1077         /*
1078          * Deal with on-disk info
1079          */
1080         if (buffer->ondisk == NULL || buf_type) {
1081                 *errorp = hammer_load_buffer(buffer, buf_type);
1082                 if (*errorp) {
1083                         hammer_rel_buffer(buffer, 1);
1084                         buffer = NULL;
1085                 }
1086         } else {
1087                 *errorp = 0;
1088         }
1089         return(buffer);
1090 }
1091
1092 static int
1093 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1094 {
1095         hammer_volume_t volume;
1096         hammer_fsbuf_ondisk_t ondisk;
1097         int error;
1098
1099         /*
1100          * Load the buffer's on-disk info
1101          */
1102         volume = buffer->volume;
1103         hammer_lock_ex(&buffer->io.lock);
1104         if (buffer->ondisk == NULL) {
1105                 if (buf_type) {
1106                         error = hammer_io_new(volume->devvp, &buffer->io);
1107                 } else {
1108                         error = hammer_io_read(volume->devvp, &buffer->io);
1109                 }
1110                 if (error) {
1111                         hammer_unlock(&buffer->io.lock);
1112                         return (error);
1113                 }
1114                 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1115                 buffer->alist.config = &Buf_alist_config;
1116                 buffer->alist.meta = ondisk->head.buf_almeta;
1117                 buffer->buf_type = ondisk->head.buf_type;
1118         } else if (buf_type) {
1119                 error = hammer_io_new(volume->devvp, &buffer->io);
1120         } else {
1121                 error = 0;
1122         }
1123         if (error == 0 && buf_type) {
1124                 hammer_modify_buffer(buffer);
1125                 ondisk = buffer->ondisk;
1126                 hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type);
1127                 buffer->buf_type = ondisk->head.buf_type;
1128         }
1129         hammer_unlock(&buffer->io.lock);
1130         return (error);
1131 }
1132
1133 /*
1134  * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1135  */
1136 int
1137 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1138 {
1139         hammer_ref(&buffer->io.lock);
1140         hammer_flush_buffer_nodes(buffer);
1141         KKASSERT(buffer->io.lock.refs == 1);
1142         hammer_rel_buffer(buffer, 2);
1143         return(0);
1144 }
1145
1146 /*
1147  * Reference a buffer that is either already referenced or via a specially
1148  * handled pointer (aka cursor->buffer).
1149  */
1150 int
1151 hammer_ref_buffer(hammer_buffer_t buffer)
1152 {
1153         int error;
1154
1155         hammer_ref(&buffer->io.lock);
1156         if (buffer->ondisk == NULL) {
1157                 error = hammer_load_buffer(buffer, 0);
1158                 if (error) {
1159                         hammer_rel_buffer(buffer, 1);
1160                         /*
1161                          * NOTE: buffer pointer can become stale after
1162                          * the above release.
1163                          */
1164                 } else {
1165                         KKASSERT(buffer->buf_type ==
1166                                  buffer->ondisk->head.buf_type);
1167                 }
1168         } else {
1169                 error = 0;
1170         }
1171         return(error);
1172 }
1173
1174 /*
1175  * Release a buffer.  We have to deal with several places where
1176  * another thread can ref the buffer.
1177  *
1178  * Only destroy the structure itself if the related buffer cache buffer
1179  * was disassociated from it.  This ties the management of the structure
1180  * to the buffer cache subsystem.  buffer->ondisk determines whether the
1181  * embedded io is referenced or not.
1182  */
1183 void
1184 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1185 {
1186         hammer_cluster_t cluster;
1187
1188         if (buffer->io.lock.refs == 1) {
1189                 hammer_lock_ex(&buffer->io.lock);
1190                 if (buffer->io.lock.refs == 1) {
1191                         hammer_io_release(&buffer->io, flush);
1192
1193                         if (buffer->io.bp == NULL &&
1194                             buffer->io.lock.refs == 1) {
1195                                 hammer_flush_buffer_nodes(buffer);
1196                                 KKASSERT(TAILQ_EMPTY(&buffer->clist));
1197                                 cluster = buffer->cluster;
1198                                 RB_REMOVE(hammer_buf_rb_tree,
1199                                           &cluster->rb_bufs_root, buffer);
1200                                 buffer->cluster = NULL; /* sanity */
1201                                 --hammer_count_buffers;
1202                                 kfree(buffer, M_HAMMER);
1203                                 hammer_rel_cluster(cluster, 0);
1204                                 return;
1205                         }
1206                 } else if (flush) {
1207                         hammer_io_flush(&buffer->io);
1208                 }
1209                 hammer_unlock(&buffer->io.lock);
1210         }
1211         hammer_unref(&buffer->io.lock);
1212 }
1213
1214 /************************************************************************
1215  *                              NODES                                   *
1216  ************************************************************************
1217  *
1218  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1219  * method used by the HAMMER filesystem.
1220  *
1221  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1222  * associated with its buffer, and will only referenced the buffer while
1223  * the node itself is referenced.
1224  *
1225  * A hammer_node can also be passively associated with other HAMMER
1226  * structures, such as inodes, while retaining 0 references.  These
1227  * associations can be cleared backwards using a pointer-to-pointer in
1228  * the hammer_node.
1229  *
1230  * This allows the HAMMER implementation to cache hammer_nodes long-term
1231  * and short-cut a great deal of the infrastructure's complexity.  In
1232  * most cases a cached node can be reacquired without having to dip into
1233  * either the buffer or cluster management code.
1234  *
1235  * The caller must pass a referenced cluster on call and will retain
1236  * ownership of the reference on return.  The node will acquire its own
1237  * additional references, if necessary.
1238  */
1239 hammer_node_t
1240 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1241 {
1242         hammer_node_t node;
1243
1244         /*
1245          * Locate the structure, allocating one if necessary.
1246          */
1247 again:
1248         node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1249                          node_offset);
1250         if (node == NULL) {
1251                 ++hammer_count_nodes;
1252                 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1253                 node->node_offset = node_offset;
1254                 node->cluster = cluster;
1255                 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1256                               node)) {
1257                         --hammer_count_nodes;
1258                         kfree(node, M_HAMMER);
1259                         goto again;
1260                 }
1261         }
1262         hammer_ref(&node->lock);
1263         *errorp = hammer_load_node(node);
1264         if (*errorp) {
1265                 hammer_rel_node(node);
1266                 node = NULL;
1267         }
1268         return(node);
1269 }
1270
1271 /*
1272  * Reference an already-referenced node.
1273  */
1274 int
1275 hammer_ref_node(hammer_node_t node)
1276 {
1277         int error;
1278
1279         KKASSERT(node->lock.refs > 0);
1280         hammer_ref(&node->lock);
1281         if ((error = hammer_load_node(node)) != 0)
1282                 hammer_rel_node(node);
1283         return(error);
1284 }
1285
1286 /*
1287  * Load a node's on-disk data reference.
1288  */
1289 static int
1290 hammer_load_node(hammer_node_t node)
1291 {
1292         hammer_buffer_t buffer;
1293         int32_t buf_no;
1294         int error;
1295
1296         if (node->ondisk)
1297                 return(0);
1298         error = 0;
1299         hammer_lock_ex(&node->lock);
1300         if (node->ondisk == NULL) {
1301                 /*
1302                  * This is a little confusing but the jist is that
1303                  * node->buffer determines whether the node is on
1304                  * the buffer's clist and node->ondisk determines
1305                  * whether the buffer is referenced.
1306                  */
1307                 if ((buffer = node->buffer) != NULL) {
1308                         error = hammer_ref_buffer(buffer);
1309                 } else {
1310                         buf_no = node->node_offset / HAMMER_BUFSIZE;
1311                         buffer = hammer_get_buffer(node->cluster,
1312                                                    buf_no, 0, &error);
1313                         if (buffer) {
1314                                 KKASSERT(error == 0);
1315                                 TAILQ_INSERT_TAIL(&buffer->clist,
1316                                                   node, entry);
1317                                 node->buffer = buffer;
1318                         }
1319                 }
1320                 if (error == 0) {
1321                         node->ondisk = (void *)((char *)buffer->ondisk +
1322                                (node->node_offset & HAMMER_BUFMASK));
1323                 }
1324         }
1325         hammer_unlock(&node->lock);
1326         return (error);
1327 }
1328
1329 /*
1330  * Safely reference a node, interlock against flushes via the IO subsystem.
1331  */
1332 hammer_node_t
1333 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
1334                      int *errorp)
1335 {
1336         hammer_node_t node;
1337
1338         if ((node = *cache) != NULL)
1339                 hammer_ref(&node->lock);
1340         if (node) {
1341                 *errorp = hammer_load_node(node);
1342                 if (*errorp) {
1343                         hammer_rel_node(node);
1344                         node = NULL;
1345                 }
1346         } else {
1347                 *errorp = ENOENT;
1348         }
1349         return(node);
1350 }
1351
1352 /*
1353  * Release a hammer_node.  On the last release the node dereferences
1354  * its underlying buffer and may or may not be destroyed.
1355  */
1356 void
1357 hammer_rel_node(hammer_node_t node)
1358 {
1359         hammer_cluster_t cluster;
1360         hammer_buffer_t buffer;
1361         int32_t node_offset;
1362         int flags;
1363
1364         /*
1365          * If this isn't the last ref just decrement the ref count and
1366          * return.
1367          */
1368         if (node->lock.refs > 1) {
1369                 hammer_unref(&node->lock);
1370                 return;
1371         }
1372
1373         /*
1374          * If there is no ondisk info or no buffer the node failed to load,
1375          * remove the last reference and destroy the node.
1376          */
1377         if (node->ondisk == NULL) {
1378                 hammer_unref(&node->lock);
1379                 hammer_flush_node(node);
1380                 /* node is stale now */
1381                 return;
1382         }
1383
1384         /*
1385          * Do final cleanups and then either destroy the node and leave it
1386          * passively cached.  The buffer reference is removed regardless.
1387          */
1388         buffer = node->buffer;
1389         node->ondisk = NULL;
1390
1391         if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
1392                 hammer_unref(&node->lock);
1393                 hammer_rel_buffer(buffer, 0);
1394                 return;
1395         }
1396
1397         /*
1398          * Destroy the node.  Record pertainant data because the node
1399          * becomes stale the instant we flush it.
1400          */
1401         flags = node->flags;
1402         node_offset = node->node_offset;
1403         hammer_unref(&node->lock);
1404         hammer_flush_node(node);
1405         /* node is stale */
1406
1407         cluster = buffer->cluster;
1408         if (flags & HAMMER_NODE_DELETED) {
1409                 hammer_free_btree(cluster, node_offset);
1410                 if (node_offset == cluster->ondisk->clu_btree_root) {
1411                         kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1412                         hammer_free_cluster(cluster);
1413                         /*hammer_io_undirty(&cluster->io);*/
1414                 }
1415         }
1416         hammer_rel_buffer(buffer, 0);
1417 }
1418
1419 /*
1420  * Passively cache a referenced hammer_node in *cache.  The caller may
1421  * release the node on return.
1422  */
1423 void
1424 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1425 {
1426         hammer_node_t old;
1427
1428         /*
1429          * If the node is being deleted, don't cache it!
1430          */
1431         if (node->flags & HAMMER_NODE_DELETED)
1432                 return;
1433
1434         /*
1435          * Cache the node.  If we previously cached a different node we
1436          * have to give HAMMER a chance to destroy it.
1437          */
1438 again:
1439         if (node->cache1 != cache) {
1440                 if (node->cache2 != cache) {
1441                         if ((old = *cache) != NULL) {
1442                                 KKASSERT(node->lock.refs != 0);
1443                                 hammer_uncache_node(cache);
1444                                 goto again;
1445                         }
1446                         if (node->cache2)
1447                                 *node->cache2 = NULL;
1448                         node->cache2 = node->cache1;
1449                         node->cache1 = cache;
1450                         *cache = node;
1451                 } else {
1452                         struct hammer_node **tmp;
1453                         tmp = node->cache1;
1454                         node->cache1 = node->cache2;
1455                         node->cache2 = tmp;
1456                 }
1457         }
1458 }
1459
1460 void
1461 hammer_uncache_node(struct hammer_node **cache)
1462 {
1463         hammer_node_t node;
1464
1465         if ((node = *cache) != NULL) {
1466                 *cache = NULL;
1467                 if (node->cache1 == cache) {
1468                         node->cache1 = node->cache2;
1469                         node->cache2 = NULL;
1470                 } else if (node->cache2 == cache) {
1471                         node->cache2 = NULL;
1472                 } else {
1473                         panic("hammer_uncache_node: missing cache linkage");
1474                 }
1475                 if (node->cache1 == NULL && node->cache2 == NULL)
1476                         hammer_flush_node(node);
1477         }
1478 }
1479
1480 /*
1481  * Remove a node's cache references and destroy the node if it has no
1482  * other references or backing store.
1483  */
1484 void
1485 hammer_flush_node(hammer_node_t node)
1486 {
1487         hammer_buffer_t buffer;
1488
1489         if (node->cache1)
1490                 *node->cache1 = NULL;
1491         if (node->cache2)
1492                 *node->cache2 = NULL;
1493         if (node->lock.refs == 0 && node->ondisk == NULL) {
1494                 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1495                           node);
1496                 if ((buffer = node->buffer) != NULL) {
1497                         node->buffer = NULL;
1498                         TAILQ_REMOVE(&buffer->clist, node, entry);
1499                         /* buffer is unreferenced because ondisk is NULL */
1500                 }
1501                 --hammer_count_nodes;
1502                 kfree(node, M_HAMMER);
1503         }
1504 }
1505
1506 /*
1507  * Flush passively cached B-Tree nodes associated with this buffer.
1508  * This is only called when the buffer is about to be destroyed, so
1509  * none of the nodes should have any references.
1510  */
1511 void
1512 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1513 {
1514         hammer_node_t node;
1515
1516         while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1517                 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1518                 hammer_ref(&node->lock);
1519                 node->flags |= HAMMER_NODE_FLUSH;
1520                 hammer_rel_node(node);
1521         }
1522 }
1523
1524 /************************************************************************
1525  *                              A-LIST ALLOCATORS                       *
1526  ************************************************************************/
1527
1528 /*
1529  * Allocate HAMMER clusters
1530  */
1531 hammer_cluster_t
1532 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1533                      int *errorp)
1534 {
1535         hammer_volume_t volume;
1536         hammer_cluster_t cluster;
1537         int32_t clu_no;
1538         int32_t clu_hint;
1539         int32_t vol_beg;
1540         int32_t vol_no;
1541
1542         /*
1543          * Figure out our starting volume and hint.
1544          */
1545         if (cluster_hint) {
1546                 vol_beg = cluster_hint->volume->vol_no;
1547                 clu_hint = cluster_hint->clu_no;
1548         } else {
1549                 vol_beg = hmp->volume_iterator;
1550                 clu_hint = -1;
1551         }
1552
1553         /*
1554          * Loop through volumes looking for a free cluster.  If allocating
1555          * a new cluster relative to an existing cluster try to find a free
1556          * cluster on either side (clu_hint >= 0), otherwise just do a
1557          * forwards iteration.
1558          */
1559         vol_no = vol_beg;
1560         do {
1561                 volume = hammer_get_volume(hmp, vol_no, errorp);
1562                 kprintf("VOLUME %p %d\n", volume, vol_no);
1563                 if (*errorp) {
1564                         clu_no = HAMMER_ALIST_BLOCK_NONE;
1565                         break;
1566                 }
1567                 hammer_modify_volume(volume);
1568                 if (clu_hint == -1) {
1569                         clu_hint = volume->clu_iterator;
1570                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1571                                                         clu_hint);
1572                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1573                                 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1574                                                                 1, 0);
1575                         }
1576                 } else {
1577                         clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1578                                                         clu_hint);
1579                         if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1580                                 clu_no = hammer_alist_alloc_rev(&volume->alist,
1581                                                                 1, clu_hint);
1582                         }
1583                 }
1584                 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1585                         break;
1586                 hammer_rel_volume(volume, 0);
1587                 volume = NULL;
1588                 *errorp = ENOSPC;
1589                 vol_no = (vol_no + 1) % hmp->nvolumes;
1590                 clu_hint = -1;
1591         } while (vol_no != vol_beg);
1592
1593         /*
1594          * Acquire the cluster.  On success this will force *errorp to 0.
1595          */
1596         if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1597                 kprintf("ALLOC CLUSTER %d:%d\n", volume->vol_no, clu_no);
1598                 cluster = hammer_get_cluster(volume, clu_no, errorp,
1599                                              HAMMER_ASTATE_FREE);
1600                 volume->clu_iterator = clu_no;
1601                 hammer_rel_volume(volume, 0);
1602         } else {
1603                 cluster = NULL;
1604         }
1605         if (cluster)
1606                 hammer_lock_ex(&cluster->io.lock);
1607         return(cluster);
1608 }
1609
1610 void
1611 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound, 
1612                     hammer_base_elm_t right_bound)
1613 {
1614         hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1615
1616         hammer_modify_cluster(cluster);
1617         ondisk->clu_btree_beg = *left_bound;
1618         ondisk->clu_btree_end = *right_bound;
1619         cluster->clu_btree_beg = ondisk->clu_btree_beg;
1620         cluster->clu_btree_end = ondisk->clu_btree_end;
1621 }
1622
1623 /*
1624  * Deallocate a cluster
1625  */
1626 void
1627 hammer_free_cluster(hammer_cluster_t cluster)
1628 {
1629         hammer_modify_cluster(cluster);
1630         hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1631 }
1632
1633 /*
1634  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1635  *
1636  * The passed *bufferp should be initialized to NULL.  On successive calls
1637  * *bufferp caches the most recent buffer used until put away by the caller.
1638  * Note that previously returned pointers using the cached buffer become
1639  * invalid on successive calls which reuse *bufferp.
1640  *
1641  * All allocations first attempt to use the block found at the specified
1642  * iterator.  If that fails the first available block is used.  If that
1643  * fails a new buffer is allocated and associated with the buffer type
1644  * A-list and the element is allocated out of the new buffer.
1645  */
1646
1647 hammer_node_t
1648 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1649 {
1650         hammer_buffer_t buffer;
1651         hammer_alist_t live;
1652         hammer_node_t node;
1653         int32_t elm_no;
1654         int32_t buf_no;
1655         int32_t node_offset;
1656
1657         /*
1658          * Allocate a B-Tree element
1659          */
1660         hammer_modify_cluster(cluster);
1661         buffer = NULL;
1662         live = &cluster->alist_btree;
1663         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1664         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1665                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1666         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1667                 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1668                                  cluster->ondisk->idx_index, errorp, &buffer);
1669                 elm_no = hammer_alist_alloc(live, 1);
1670                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1671                         *errorp = ENOSPC;
1672                         if (buffer)
1673                                 hammer_rel_buffer(buffer, 0);
1674                         return(NULL);
1675                 }
1676         }
1677         cluster->ondisk->idx_index = elm_no;
1678         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1679
1680         /*
1681          * Load and return the B-Tree element
1682          */
1683         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1684         node_offset = buf_no * HAMMER_BUFSIZE +
1685                       offsetof(union hammer_fsbuf_ondisk,
1686                                btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1687         node = hammer_get_node(cluster, node_offset, errorp);
1688         if (node) {
1689                 hammer_modify_node(node);
1690                 bzero(node->ondisk, sizeof(*node->ondisk));
1691         } else {
1692                 hammer_alist_free(live, elm_no, 1);
1693                 hammer_rel_node(node);
1694                 node = NULL;
1695         }
1696         if (buffer)
1697                 hammer_rel_buffer(buffer, 0);
1698         return(node);
1699 }
1700
1701 void *
1702 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1703                   int *errorp, struct hammer_buffer **bufferp)
1704 {
1705         hammer_buffer_t buffer;
1706         hammer_alist_t live;
1707         int32_t elm_no;
1708         int32_t buf_no;
1709         int32_t nblks;
1710         void *item;
1711
1712         /*
1713          * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
1714          * for these allocations.
1715          */
1716         hammer_modify_cluster(cluster);
1717         if ((bytes & HAMMER_BUFMASK) == 0) {
1718                 nblks = bytes / HAMMER_BUFSIZE;
1719                 /* only one block allowed for now (so buffer can hold it) */
1720                 KKASSERT(nblks == 1);
1721
1722                 buf_no = hammer_alloc_master(cluster, nblks,
1723                                              cluster->ondisk->idx_ldata, 1);
1724                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1725                         *errorp = ENOSPC;
1726                         return(NULL);
1727                 }
1728                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1729                 cluster->ondisk->idx_ldata = buf_no;
1730                 buffer = *bufferp;
1731                 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1732                 if (buffer)
1733                         hammer_rel_buffer(buffer, 0);
1734                 buffer = *bufferp;
1735                 return(buffer->ondisk);
1736         }
1737
1738         /*
1739          * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
1740          * (64 bytes) for these allocations.
1741          */
1742         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1743         nblks /= HAMMER_DATA_BLKSIZE;
1744         live = &cluster->alist_mdata;
1745         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1746         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1747                 elm_no = hammer_alist_alloc_fwd(live, nblks, 0);
1748         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1749                 alloc_new_buffer(cluster, HAMMER_FSBUF_DATA, live,
1750                                  cluster->ondisk->idx_data, errorp, bufferp);
1751                 elm_no = hammer_alist_alloc(live, nblks);
1752                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1753                         *errorp = ENOSPC;
1754                         return(NULL);
1755                 }
1756         }
1757         cluster->ondisk->idx_index = elm_no;
1758
1759         /*
1760          * Load and return the B-Tree element
1761          */
1762         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1763         buffer = *bufferp;
1764         if (buffer == NULL || buffer->cluster != cluster ||
1765             buffer->buf_no != buf_no) {
1766                 if (buffer)
1767                         hammer_rel_buffer(buffer, 0);
1768                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1769                 *bufferp = buffer;
1770         }
1771         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1772         KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1773         hammer_modify_buffer(buffer);
1774         item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1775         bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1776         *errorp = 0;
1777         return(item);
1778 }
1779
1780 void *
1781 hammer_alloc_record(hammer_cluster_t cluster,
1782                     int *errorp, struct hammer_buffer **bufferp)
1783 {
1784         hammer_buffer_t buffer;
1785         hammer_alist_t live;
1786         int32_t elm_no;
1787         int32_t buf_no;
1788         void *item;
1789
1790         /*
1791          * Allocate a record element
1792          */
1793         hammer_modify_cluster(cluster);
1794         live = &cluster->alist_record;
1795         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1796         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1797                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1798         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1799                 alloc_new_buffer(cluster, HAMMER_FSBUF_RECORDS, live,
1800                                  cluster->ondisk->idx_record, errorp, bufferp);
1801                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1802                 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1803                 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1804                         *errorp = ENOSPC;
1805                         return(NULL);
1806                 }
1807         }
1808         cluster->ondisk->idx_record = elm_no;
1809
1810         /*
1811          * Load and return the record element
1812          */
1813         buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1814         buffer = *bufferp;
1815         if (buffer == NULL || buffer->cluster != cluster ||
1816             buffer->buf_no != buf_no) {
1817                 if (buffer)
1818                         hammer_rel_buffer(buffer, 0);
1819                 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1820                 *bufferp = buffer;
1821         }
1822         KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1823         KASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES,
1824                 ("elm_no %d (%d) out of bounds", elm_no, elm_no & HAMMER_FSBUF_BLKMASK));
1825         hammer_modify_buffer(buffer);
1826         item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1827         bzero(item, sizeof(union hammer_record_ondisk));
1828         *errorp = 0;
1829         return(item);
1830 }
1831
1832 void
1833 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1834 {
1835         int32_t elm_no;
1836         int32_t nblks;
1837         hammer_alist_t live;
1838
1839         hammer_modify_cluster(buffer->cluster);
1840         if ((bytes & HAMMER_BUFMASK) == 0) {
1841                 nblks = bytes / HAMMER_BUFSIZE;
1842                 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1843                 hammer_alist_free(&buffer->cluster->alist_master,
1844                                   buffer->buf_no, nblks);
1845                 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1846                 return;
1847         }
1848
1849         elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1850                  HAMMER_DATA_BLKSIZE;
1851         KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1852         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1853         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1854         nblks /= HAMMER_DATA_BLKSIZE;
1855         live = &buffer->cluster->alist_mdata;
1856         hammer_alist_free(live, elm_no, nblks);
1857 }
1858
1859 void
1860 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1861 {
1862         int32_t elm_no;
1863         hammer_alist_t live;
1864
1865         hammer_modify_cluster(buffer->cluster);
1866         elm_no = rec - &buffer->ondisk->record.recs[0];
1867         KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1868         elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1869         live = &buffer->cluster->alist_record;
1870         hammer_alist_free(live, elm_no, 1);
1871 }
1872
1873 void
1874 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1875 {
1876         const int32_t blksize = sizeof(struct hammer_node_ondisk);
1877         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1878         hammer_alist_t live;
1879         int32_t elm_no;
1880
1881         hammer_modify_cluster(cluster);
1882         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1883         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1884         live = &cluster->alist_btree;
1885         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1886         elm_no += fsbuf_offset / blksize;
1887         hammer_alist_free(live, elm_no, 1);
1888 }
1889
1890 void
1891 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1892 {
1893         const int32_t blksize = HAMMER_DATA_BLKSIZE;
1894         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1895         hammer_alist_t live;
1896         int32_t elm_no;
1897         int32_t buf_no;
1898         int32_t nblks;
1899
1900         hammer_modify_cluster(cluster);
1901         if ((bytes & HAMMER_BUFMASK) == 0) {
1902                 nblks = bytes / HAMMER_BUFSIZE;
1903                 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1904                 buf_no = bclu_offset / HAMMER_BUFSIZE;
1905                 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1906                 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1907                 return;
1908         }
1909
1910         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1911         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1912         live = &cluster->alist_mdata;
1913         nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1914         nblks /= HAMMER_DATA_BLKSIZE;
1915         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1916         elm_no += fsbuf_offset / blksize;
1917         hammer_alist_free(live, elm_no, nblks);
1918 }
1919
1920 void
1921 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1922 {
1923         const int32_t blksize = sizeof(union hammer_record_ondisk);
1924         int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1925         hammer_alist_t live;
1926         int32_t elm_no;
1927
1928         hammer_modify_cluster(cluster);
1929         elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1930         fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1931         live = &cluster->alist_record;
1932         KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1933         elm_no += fsbuf_offset / blksize;
1934         hammer_alist_free(live, elm_no, 1);
1935 }
1936
1937
1938 /*
1939  * Allocate a new filesystem buffer and assign it to the specified
1940  * filesystem buffer type.  The new buffer will be added to the
1941  * type-specific A-list and initialized.
1942  *
1943  * buffers used for records will also be added to the clu_record_buf_bitmap.
1944  */
1945 static void
1946 alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live,
1947                  int start, int *errorp, struct hammer_buffer **bufferp)
1948 {
1949         hammer_buffer_t buffer;
1950         int32_t buf_no;
1951         int32_t base_blk;
1952         int isfwd;
1953
1954         if (*bufferp)
1955                 hammer_rel_buffer(*bufferp, 0);
1956         *bufferp = NULL;
1957
1958         start = start / HAMMER_FSBUF_MAXBLKS;   /* convert to buf_no */
1959         isfwd = (type != HAMMER_FSBUF_RECORDS);
1960         buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1961         if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1962                 *errorp = ENOSPC;
1963                 return;
1964         }
1965
1966         /*
1967          * The new buffer must be initialized (type != 0) regardless of
1968          * whether we already have it cached or not, so don't try to
1969          * optimize the cached buffer check.  Just call hammer_get_buffer().
1970          */
1971         buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1972         *bufferp = buffer;
1973
1974         /*
1975          * Do a meta-free of the buffer's elements into the type-specific
1976          * A-list and update our statistics to reflect the allocation.
1977          */
1978         if (buffer) {
1979 #if 0
1980                 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1981                         buf_no, type, nelements);
1982 #endif
1983                 hammer_modify_buffer(buffer);  /*XXX*/
1984                 hammer_adjust_stats(cluster, type, 1);
1985
1986                 /*
1987                  * Free the buffer to the appropriate slave list so the
1988                  * cluster-based allocator sees it.
1989                  */
1990                 base_blk = buf_no * HAMMER_FSBUF_MAXBLKS;
1991
1992                 switch(type) {
1993                 case HAMMER_FSBUF_BTREE:
1994                         hammer_alist_free(live, base_blk, HAMMER_BTREE_NODES);
1995                         break;
1996                 case HAMMER_FSBUF_DATA:
1997                         hammer_alist_free(live, base_blk, HAMMER_DATA_NODES);
1998                         break;
1999                 case HAMMER_FSBUF_RECORDS:
2000                         hammer_alist_free(live, base_blk, HAMMER_RECORD_NODES);
2001                         break;
2002                 }
2003         }
2004
2005         /*
2006          * And, finally, update clu_record_buf_bitmap for record buffers.
2007          * Since buffers are synced to disk before their associated cluster
2008          * header, a recovery operation will only see synced record buffers
2009          * in the bitmap.  XXX We can't use alist_record for recovery due
2010          * to the way we currently manage it.
2011          */
2012         if (buffer && type == HAMMER_FSBUF_RECORDS) {
2013                 KKASSERT(buf_no >= 0 && buf_no < HAMMER_CLU_MAXBUFFERS);
2014                 hammer_modify_cluster(cluster);
2015                 cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5] |=
2016                         (1 << (buf_no & 31));
2017         }
2018 }
2019
2020 /*
2021  * Sync dirty buffers to the media
2022  */
2023
2024 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2025 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2026
2027 int
2028 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2029 {
2030         struct hammer_sync_info info;
2031
2032         info.error = 0;
2033         info.waitfor = waitfor;
2034
2035         kprintf("hammer_sync\n");
2036         vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2037                       hammer_sync_scan1, hammer_sync_scan2, &info);
2038
2039         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2040                 hammer_sync_volume, &info);
2041         return(info.error);
2042 }
2043
2044 static int
2045 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2046 {
2047         struct hammer_inode *ip;
2048
2049         ip = VTOI(vp);
2050         if (vp->v_type == VNON || ip == NULL ||
2051             ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2052              RB_EMPTY(&vp->v_rbdirty_tree))) {
2053                 return(-1);
2054         }
2055         return(0);
2056 }
2057
2058 static int
2059 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2060 {
2061         struct hammer_sync_info *info = data;
2062         struct hammer_inode *ip;
2063         int error;
2064
2065         ip = VTOI(vp);
2066         if (vp->v_type == VNON || vp->v_type == VBAD ||
2067             ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2068              RB_EMPTY(&vp->v_rbdirty_tree))) {
2069                 return(0);
2070         }
2071         if (vp->v_type != VCHR) {
2072                 error = VOP_FSYNC(vp, info->waitfor);
2073                 if (error)
2074                         info->error = error;
2075         }
2076         return(0);
2077 }
2078
2079 int
2080 hammer_sync_volume(hammer_volume_t volume, void *data)
2081 {
2082         struct hammer_sync_info *info = data;
2083
2084         RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2085                 hammer_sync_cluster, info);
2086         if (hammer_ref_volume(volume) == 0)
2087                 hammer_rel_volume(volume, 1);
2088         return(0);
2089 }
2090
2091 int
2092 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2093 {
2094         struct hammer_sync_info *info = data;
2095
2096         RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2097                 hammer_sync_buffer, info);
2098         /*hammer_io_waitdep(&cluster->io);*/
2099         if (hammer_ref_cluster(cluster) == 0)
2100                 hammer_rel_cluster(cluster, 1);
2101         return(0);
2102 }
2103
2104 int
2105 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
2106 {
2107         if (hammer_ref_buffer(buffer) == 0)
2108                 hammer_rel_buffer(buffer, 1);
2109         return(0);
2110 }
2111
2112 /*
2113  * Generic buffer initialization.  Initialize the A-list into an all-allocated
2114  * state with the free block limit properly set.
2115  *
2116  * Note that alloc_new_buffer() will free the appropriate block range via
2117  * the appropriate cluster alist, so the free count is properly propogated.
2118  */
2119 void
2120 hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2121 {
2122         head->buf_type = type;
2123
2124         switch(type) {
2125         case HAMMER_FSBUF_BTREE:
2126                 hammer_alist_init(live, 0, HAMMER_BTREE_NODES,
2127                                   HAMMER_ASTATE_ALLOC);
2128                 break;
2129         case HAMMER_FSBUF_DATA:
2130                 hammer_alist_init(live, 0, HAMMER_DATA_NODES,
2131                                   HAMMER_ASTATE_ALLOC);
2132                 break;
2133         case HAMMER_FSBUF_RECORDS:
2134                 hammer_alist_init(live, 0, HAMMER_RECORD_NODES,
2135                                   HAMMER_ASTATE_ALLOC);
2136                 break;
2137         default:
2138                 hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
2139                 break;
2140         }
2141 }
2142
2143 /*
2144  * Calculate the cluster's offset in the volume.  This calculation is
2145  * slightly more complex when using superclusters because superclusters
2146  * are grouped in blocks of 16, followed by 16 x N clusters where N
2147  * is the number of clusters a supercluster can manage.
2148  */
2149 static int64_t
2150 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2151 {
2152         int32_t scl_group;
2153         int64_t scl_group_size;
2154         int64_t off;
2155
2156         if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2157                 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2158                             HAMMER_SCL_MAXCLUSTERS;
2159                 scl_group_size = 
2160                             ((int64_t)HAMMER_BUFSIZE *
2161                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2162                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2163                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2164                 scl_group_size += 
2165                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2166
2167                 off = volume->cluster_base +
2168                       scl_group * scl_group_size +
2169                       (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2170                       ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2171                        HAMMER_VOL_SUPERCLUSTER_GROUP))
2172                       * volume->vol_clsize;
2173         } else {
2174                 off = volume->cluster_base +
2175                       (int64_t)clu_no * volume->vol_clsize;
2176         }
2177         return(off);
2178 }
2179
2180 /*
2181  * Calculate a super-cluster's offset in the volume.
2182  */
2183 static int64_t
2184 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2185 {
2186         int64_t off;
2187         int32_t scl_group;
2188         int64_t scl_group_size;
2189
2190         KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2191         scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2192         if (scl_group) {
2193                 scl_group_size = 
2194                             ((int64_t)HAMMER_BUFSIZE *
2195                              HAMMER_VOL_SUPERCLUSTER_GROUP) +
2196                             ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2197                              volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2198                 scl_group_size += 
2199                             HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2200                 off = volume->cluster_base + (scl_group * scl_group_size) +
2201                       (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2202         } else {
2203                 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2204         }
2205         return(off);
2206 }
2207
2208 /*
2209  * Allocate nblks buffers from the cluster's master alist.
2210  */
2211 static int32_t
2212 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2213                     int32_t start, int isfwd)
2214 {
2215         int32_t buf_no;
2216
2217         hammer_modify_cluster(cluster);
2218         if (isfwd) {
2219                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2220                                                 nblks, start);
2221                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2222                         buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2223                                                 nblks, 0);
2224                 }
2225         } else {
2226                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2227                                                 nblks, start);
2228                 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2229                         buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2230                                                 nblks, HAMMER_ALIST_BLOCK_MAX);
2231                 }
2232         }
2233
2234         /*
2235          * Recover space from empty record, b-tree, and data a-lists.
2236          */
2237
2238         return(buf_no);
2239 }
2240
2241 /*
2242  * Adjust allocation statistics
2243  */
2244 static void
2245 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2246 {
2247         hammer_modify_cluster(cluster);
2248         hammer_modify_volume(cluster->volume);
2249         hammer_modify_volume(cluster->volume->hmp->rootvol);
2250
2251         switch(buf_type) {
2252         case HAMMER_FSBUF_BTREE:
2253                 cluster->ondisk->stat_idx_bufs += nblks;
2254                 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2255                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2256                 break;
2257         case HAMMER_FSBUF_DATA:
2258                 cluster->ondisk->stat_data_bufs += nblks;
2259                 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2260                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2261                 break;
2262         case HAMMER_FSBUF_RECORDS:
2263                 cluster->ondisk->stat_rec_bufs += nblks;
2264                 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2265                 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2266                 break;
2267         }
2268 }
2269
2270 /*
2271  * A-LIST SUPPORT
2272  *
2273  * Setup the parameters for the various A-lists we use in hammer.  The
2274  * supercluster A-list must be chained to the cluster A-list and cluster
2275  * slave A-lists are chained to buffer A-lists.
2276  *
2277  * See hammer_init_alist_config() below.
2278  */
2279
2280 /*
2281  * A-LIST - cluster recursion into a filesystem buffer
2282  *
2283  * In the init case the buffer has already been initialized by
2284  * alloc_new_buffer() when it allocated the buffer out of the master
2285  * alist and marked it as free in the slave alist.
2286  *
2287  * Because we use a somewhat odd mechanism to assign buffers to slave
2288  * pools we can't actually free the buffer back to the master alist in
2289  * buffer_alist_destroy(), but instead must deal with that logic somewhere
2290  * else.
2291  */
2292 static int
2293 buffer_alist_init(void *info, int32_t blk, int32_t radix,
2294                   hammer_alloc_state_t state)
2295 {
2296         return(0);
2297 }
2298
2299 static int
2300 buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2301 {
2302         hammer_cluster_t cluster = info;
2303         hammer_buffer_t buffer;
2304         int32_t buf_no;
2305         int error = 0;
2306
2307         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2308         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2309         if (buffer) {
2310                 hammer_modify_buffer(buffer);
2311                 error = hammer_alist_recover(&buffer->alist, blk, 0, count);
2312                 /* free block count is returned if >= 0 */
2313                 hammer_rel_buffer(buffer, 0);
2314         } else {
2315                 error = -error;
2316         }
2317         return (error);
2318 }
2319
2320 /*
2321  * Note: This routine is only called when freeing the last elements of
2322  * an initialized buffer.  Freeing all elements of the buffer when the
2323  * buffer was not previously initialized does not call this routine.
2324  */
2325 static int
2326 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2327 {
2328         hammer_cluster_t cluster = info;
2329         int32_t buf_no;
2330
2331         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2332         kprintf("destroy buffer %d:%d:%d\n", cluster->volume->vol_no, cluster->clu_no, buf_no);
2333         return (0);
2334 }
2335
2336 /*
2337  * Note: atblk can be negative and atblk - blk can go negative.
2338  */
2339 static int
2340 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2341                       int32_t count, int32_t atblk, int32_t *fullp)
2342 {
2343         hammer_cluster_t cluster = info;
2344         hammer_buffer_t buffer;
2345         int32_t buf_no;
2346         int32_t r;
2347         int error = 0;
2348
2349         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2350         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2351         if (buffer) {
2352                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2353
2354                 hammer_modify_buffer(buffer);
2355                 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2356                 if (r != HAMMER_ALIST_BLOCK_NONE)
2357                         r += blk;
2358                 *fullp = hammer_alist_isfull(&buffer->alist);
2359                 hammer_rel_buffer(buffer, 0);
2360         } else {
2361                 r = HAMMER_ALIST_BLOCK_NONE;
2362                 *fullp = 1;
2363         }
2364         return(r);
2365 }
2366
2367 static int
2368 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2369                       int32_t count, int32_t atblk, int32_t *fullp)
2370 {
2371         hammer_cluster_t cluster = info;
2372         hammer_buffer_t buffer;
2373         int32_t buf_no;
2374         int32_t r;
2375         int error = 0;
2376
2377         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2378         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2379         if (buffer) {
2380                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2381                 hammer_modify_buffer(buffer);
2382                 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2383                 if (r != HAMMER_ALIST_BLOCK_NONE)
2384                         r += blk;
2385                 *fullp = hammer_alist_isfull(&buffer->alist);
2386                 hammer_rel_buffer(buffer, 0);
2387         } else {
2388                 r = HAMMER_ALIST_BLOCK_NONE;
2389                 *fullp = 1;
2390         }
2391         return(r);
2392 }
2393
2394 static void
2395 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2396                  int32_t base_blk, int32_t count, int32_t *emptyp)
2397 {
2398         hammer_cluster_t cluster = info;
2399         hammer_buffer_t buffer;
2400         int32_t buf_no;
2401         int error = 0;
2402
2403         buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2404         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2405         if (buffer) {
2406                 KKASSERT(buffer->ondisk->head.buf_type != 0);
2407                 hammer_modify_buffer(buffer);
2408                 hammer_alist_free(&buffer->alist, base_blk, count);
2409                 *emptyp = hammer_alist_isempty(&buffer->alist);
2410                 /* XXX don't bother updating the buffer is completely empty? */
2411                 hammer_rel_buffer(buffer, 0);
2412         } else {
2413                 *emptyp = 0;
2414         }
2415 }
2416
2417 static void
2418 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2419 {
2420 }
2421
2422 /*
2423  * A-LIST - super-cluster recursion into a cluster and cluster recursion
2424  * into a filesystem buffer.  A-List's are mostly self-contained entities,
2425  * but callbacks must be installed to recurse from one A-List to another.
2426  *
2427  * Implementing these callbacks allows us to operate a multi-layered A-List
2428  * as a single entity.
2429  */
2430
2431 /*
2432  * This occurs when allocating a cluster via the volume a-list and the
2433  * entry in the volume a-list indicated all-free.  The underlying supercl
2434  * has not yet been initialized.
2435  */
2436 static int
2437 super_alist_init(void *info, int32_t blk, int32_t radix,
2438                  hammer_alloc_state_t state)
2439 {
2440         hammer_volume_t volume = info;
2441         hammer_supercl_t supercl;
2442         int32_t scl_no;
2443         int error = 0;
2444
2445         /*
2446          * Calculate the super-cluster number containing the cluster (blk)
2447          * and obtain the super-cluster buffer.
2448          */
2449         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2450         supercl = hammer_get_supercl(volume, scl_no, &error, state);
2451         if (supercl)
2452                 hammer_rel_supercl(supercl, 0);
2453         return (error);
2454 }
2455
2456 static int
2457 super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2458 {
2459         hammer_volume_t volume = info;
2460         hammer_supercl_t supercl;
2461         int32_t scl_no;
2462         int error = 0;
2463
2464         /*
2465          * Calculate the super-cluster number containing the cluster (blk)
2466          * and obtain the super-cluster buffer.
2467          */
2468         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2469         supercl = hammer_get_supercl(volume, scl_no, &error,
2470                                      HAMMER_ASTATE_NONE);
2471         if (supercl) {
2472                 hammer_modify_supercl(supercl);
2473                 error = hammer_alist_recover(&supercl->alist, blk, 0, count);
2474                 /* free block count is returned if >= 0 */
2475                 hammer_rel_supercl(supercl, 0);
2476         } else {
2477                 error = -error;
2478         }
2479         return (error);
2480 }
2481
2482 /*
2483  * This occurs when freeing a cluster via the volume a-list and the
2484  * supercl is now 100% free.  We can destroy the supercl.
2485  *
2486  * What we actually do is just unset the modify bit so it doesn't get
2487  * written out.
2488  */
2489 static int
2490 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2491 {
2492         hammer_volume_t volume = info;
2493         hammer_supercl_t supercl;
2494         int32_t scl_no;
2495         int error = 0;
2496
2497         /*
2498          * Calculate the super-cluster number containing the cluster (blk)
2499          * and obtain the super-cluster buffer.
2500          */
2501         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2502         if (hammer_find_supercl(volume, scl_no)) {
2503                 supercl = hammer_get_supercl(volume, scl_no, &error,
2504                                              HAMMER_ASTATE_FREE);
2505                                              /* XXX */
2506                 hammer_io_clear_modify(&supercl->io);
2507                 if (supercl)
2508                         hammer_rel_supercl(supercl, 0);
2509         }
2510         return (error);
2511 }
2512
2513 static int
2514 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2515                       int32_t count, int32_t atblk, int32_t *fullp)
2516 {
2517         hammer_volume_t volume = info;
2518         hammer_supercl_t supercl;
2519         int32_t scl_no;
2520         int32_t r;
2521         int error = 0;
2522
2523         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2524         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2525         if (supercl) {
2526                 hammer_modify_supercl(supercl);
2527                 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2528                 if (r != HAMMER_ALIST_BLOCK_NONE)
2529                         r += blk;
2530                 *fullp = hammer_alist_isfull(&supercl->alist);
2531                 hammer_rel_supercl(supercl, 0);
2532         } else {
2533                 r = HAMMER_ALIST_BLOCK_NONE;
2534                 *fullp = 1;
2535         }
2536         return(r);
2537 }
2538
2539 static int
2540 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2541                       int32_t count, int32_t atblk, int32_t *fullp)
2542 {
2543         hammer_volume_t volume = info;
2544         hammer_supercl_t supercl;
2545         int32_t scl_no;
2546         int32_t r;
2547         int error = 0;
2548
2549         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2550         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2551         if (supercl) {
2552                 hammer_modify_supercl(supercl);
2553                 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2554                 if (r != HAMMER_ALIST_BLOCK_NONE)
2555                         r += blk;
2556                 *fullp = hammer_alist_isfull(&supercl->alist);
2557                 hammer_rel_supercl(supercl, 0);
2558         } else { 
2559                 r = HAMMER_ALIST_BLOCK_NONE;
2560                 *fullp = 1;
2561         }
2562         return(r);
2563 }
2564
2565 static void
2566 super_alist_free(void *info, int32_t blk, int32_t radix,
2567                  int32_t base_blk, int32_t count, int32_t *emptyp)
2568 {
2569         hammer_volume_t volume = info;
2570         hammer_supercl_t supercl;
2571         int32_t scl_no;
2572         int error = 0;
2573
2574         scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2575         supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2576         if (supercl) {
2577                 hammer_modify_supercl(supercl);
2578                 hammer_alist_free(&supercl->alist, base_blk, count);
2579                 *emptyp = hammer_alist_isempty(&supercl->alist);
2580                 hammer_rel_supercl(supercl, 0);
2581         } else {
2582                 *emptyp = 0;
2583         }
2584 }
2585
2586 static void
2587 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2588 {
2589 }
2590
2591 void
2592 hammer_init_alist_config(void)
2593 {
2594         hammer_alist_config_t config;
2595
2596         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2597                               1, HAMMER_FSBUF_METAELMS);
2598         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2599                               1, HAMMER_VOL_METAELMS_1LYR);
2600         hammer_alist_template(&Vol_super_alist_config,
2601                           HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2602                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2603         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2604                               1, HAMMER_SUPERCL_METAELMS);
2605         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2606                               1, HAMMER_CLU_MASTER_METAELMS);
2607         hammer_alist_template(&Clu_slave_alist_config,
2608                               HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2609                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2610
2611         config = &Vol_super_alist_config;
2612         config->bl_radix_init = super_alist_init;
2613         config->bl_radix_recover = super_alist_recover;
2614         config->bl_radix_destroy = super_alist_destroy;
2615         config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2616         config->bl_radix_alloc_rev = super_alist_alloc_rev;
2617         config->bl_radix_free = super_alist_free;
2618         config->bl_radix_print = super_alist_print;
2619
2620         config = &Clu_slave_alist_config;
2621         config->bl_radix_init = buffer_alist_init;
2622         config->bl_radix_recover = buffer_alist_recover;
2623         config->bl_radix_destroy = buffer_alist_destroy;
2624         config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2625         config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2626         config->bl_radix_free = buffer_alist_free;
2627         config->bl_radix_print = buffer_alist_print;
2628 }
2629