Merge from vendor branch FILE:
[dragonfly.git] / sbin / hammer / ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sbin/hammer/ondisk.c,v 1.8 2008/01/17 04:59:48 dillon Exp $
35  */
36
37 #include <sys/types.h>
38 #include <assert.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include "hammer_util.h"
47
48 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
49                         u_int64_t type);
50 static void alloc_new_buffer(struct cluster_info *cluster, hammer_alist_t live,
51                         u_int64_t type, int32_t nelements);
52 #if 0
53 static void readhammerbuf(struct volume_info *vol, void *data,
54                         int64_t offset);
55 #endif
56 static void writehammerbuf(struct volume_info *vol, const void *data,
57                         int64_t offset);
58
59
60 struct hammer_alist_config Buf_alist_config;
61 struct hammer_alist_config Vol_normal_alist_config;
62 struct hammer_alist_config Vol_super_alist_config;
63 struct hammer_alist_config Supercl_alist_config;
64 struct hammer_alist_config Clu_master_alist_config;
65 struct hammer_alist_config Clu_slave_alist_config;
66 uuid_t Hammer_FSType;
67 uuid_t Hammer_FSId;
68 int64_t BootAreaSize;
69 int64_t MemAreaSize;
70 int     UsingSuperClusters;
71 int     NumVolumes;
72 int     RootVolNo = -1;
73 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
74
75 void
76 init_alist_templates(void)
77 {
78         /*
79          * Initialize the alist templates we will be using
80          */
81         hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
82                               1, HAMMER_FSBUF_METAELMS);
83         hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
84                               1, HAMMER_VOL_METAELMS_1LYR);
85         hammer_alist_template(&Vol_super_alist_config,
86                           HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
87                               HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
88         hammer_super_alist_template(&Vol_super_alist_config);
89         hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
90                               1, HAMMER_SUPERCL_METAELMS);
91         hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
92                               1, HAMMER_CLU_MASTER_METAELMS);
93         hammer_alist_template(&Clu_slave_alist_config,
94                               HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
95                               HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
96         hammer_buffer_alist_template(&Clu_slave_alist_config);
97 }
98
99 /*
100  * Lookup the requested information structure and related on-disk buffer.
101  * Missing structures are created.
102  */
103
104 struct volume_info *
105 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
106 {
107         struct volume_info *vol;
108         struct volume_info *scan;
109         struct hammer_volume_ondisk *ondisk;
110         int n;
111
112         /*
113          * Allocate the volume structure
114          */
115         vol = malloc(sizeof(*vol));
116         bzero(vol, sizeof(*vol));
117         TAILQ_INIT(&vol->cluster_list);
118         TAILQ_INIT(&vol->supercl_list);
119         vol->name = strdup(filename);
120         vol->fd = open(filename, oflags);
121         if (vol->fd < 0) {
122                 free(vol->name);
123                 free(vol);
124                 err(1, "setup_volume: %s: Open failed", filename);
125         }
126
127         /*
128          * Read or initialize the volume header
129          */
130         vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
131         if (isnew) {
132                 bzero(ondisk, HAMMER_BUFSIZE);
133                 vol->using_supercl = UsingSuperClusters;
134         } else {
135                 n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
136                 if (n != HAMMER_BUFSIZE) {
137                         err(1, "setup_volume: %s: Read failed at offset 0",
138                             filename);
139                 }
140                 if (ondisk->vol_flags & HAMMER_VOLF_USINGSUPERCL)
141                         vol->using_supercl = 1;
142                 vol_no = ondisk->vol_no;
143                 if (RootVolNo < 0) {
144                         RootVolNo = ondisk->vol_rootvol;
145                 } else if (RootVolNo != (int)ondisk->vol_rootvol) {
146                         errx(1, "setup_volume: %s: root volume disagreement: "
147                                 "%d vs %d",
148                                 vol->name, RootVolNo, ondisk->vol_rootvol);
149                 }
150
151                 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
152                         errx(1, "setup_volume: %s: Header does not indicate "
153                                 "that this is a hammer volume", vol->name);
154                 }
155                 if (TAILQ_EMPTY(&VolList)) {
156                         Hammer_FSId = vol->ondisk->vol_fsid;
157                 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
158                         errx(1, "setup_volume: %s: FSId does match other "
159                                 "volumes!", vol->name);
160                 }
161         }
162         vol->vol_no = vol_no;
163         if (vol->using_supercl) {
164                 vol->clu_alist.config = &Vol_super_alist_config;
165                 vol->clu_alist.meta = ondisk->vol_almeta.super;
166                 vol->clu_alist.info = vol;
167         } else {
168                 vol->clu_alist.config = &Vol_normal_alist_config;
169                 vol->clu_alist.meta = ondisk->vol_almeta.normal;
170         }
171         vol->buf_alist.config = &Buf_alist_config;
172         vol->buf_alist.meta = ondisk->head.buf_almeta;
173
174         if (isnew) {
175                 hammer_alist_init(&vol->clu_alist, 0, 0, HAMMER_ASTATE_ALLOC);
176                 initbuffer(&vol->buf_alist, &ondisk->head, HAMMER_FSBUF_VOLUME);
177                 vol->cache.modified = 1;
178         }
179
180         /*
181          * Link the volume structure in
182          */
183         TAILQ_FOREACH(scan, &VolList, entry) {
184                 if (scan->vol_no == vol_no) {
185                         errx(1, "setup_volume %s: Duplicate volume number %d "
186                                 "against %s", filename, vol_no, scan->name);
187                 }
188         }
189         TAILQ_INSERT_TAIL(&VolList, vol, entry);
190         return(vol);
191 }
192
193 struct volume_info *
194 get_volume(int32_t vol_no)
195 {
196         struct volume_info *vol;
197
198         TAILQ_FOREACH(vol, &VolList, entry) {
199                 if (vol->vol_no == vol_no)
200                         break;
201         }
202         if (vol == NULL)
203                 errx(1, "get_volume: Volume %d does not exist!", vol_no);
204         ++vol->cache.refs;
205         /* not added to or removed from hammer cache */
206         return(vol);
207 }
208
209 void
210 rel_volume(struct volume_info *volume)
211 {
212         /* not added to or removed from hammer cache */
213         --volume->cache.refs;
214 }
215
216 struct supercl_info *
217 get_supercl(struct volume_info *vol, int32_t scl_no, hammer_alloc_state_t isnew)
218 {
219         struct hammer_supercl_ondisk *ondisk;
220         struct supercl_info *scl;
221         int32_t scl_group;
222         int64_t scl_group_size;
223         int64_t clusterSize = vol->ondisk->vol_clsize;
224         int n;
225
226         assert(vol->using_supercl);
227
228         TAILQ_FOREACH(scl, &vol->supercl_list, entry) {
229                 if (scl->scl_no == scl_no)
230                         break;
231         }
232         if (scl == NULL) {
233                 /*
234                  * Allocate the scl
235                  */
236                 scl = malloc(sizeof(*scl));
237                 bzero(scl, sizeof(*scl));
238                 scl->scl_no = scl_no;
239                 scl->volume = vol;
240                 TAILQ_INSERT_TAIL(&vol->supercl_list, scl, entry);
241                 ++vol->cache.refs;
242                 scl->cache.u.supercl = scl;
243                 hammer_cache_add(&scl->cache, ISSUPERCL);
244
245                 /*
246                  * Calculate the super-cluster's offset in the volume.
247                  *
248                  * The arrangement is [scl * N][N * 32768 clusters], repeat.
249                  * N is typically 16.
250                  */
251                 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
252                 scl_group_size = ((int64_t)HAMMER_BUFSIZE *
253                                   HAMMER_VOL_SUPERCLUSTER_GROUP) +
254                                   ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
255                                   clusterSize * HAMMER_SCL_MAXCLUSTERS);
256                 scl->scl_offset = vol->ondisk->vol_clo_beg +
257                                   scl_group * scl_group_size +
258                                   (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) *
259                                   HAMMER_BUFSIZE;
260         }
261         ++scl->cache.refs;
262         hammer_cache_flush();
263         if ((ondisk = scl->ondisk) == NULL) {
264                 scl->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
265                 scl->clu_alist.config = &Supercl_alist_config;
266                 scl->clu_alist.meta = ondisk->scl_meta;
267                 scl->buf_alist.config = &Buf_alist_config;
268                 scl->buf_alist.meta = ondisk->head.buf_almeta;
269                 if (isnew == 0) {
270                         n = pread(vol->fd, ondisk, HAMMER_BUFSIZE,
271                                   scl->scl_offset);
272                         if (n != HAMMER_BUFSIZE) {
273                                 err(1, "get_supercl: %s:%d Read failed "
274                                     "at offset %lld",
275                                     vol->name, scl_no, scl->scl_offset);
276                         }
277                 }
278         }
279         if (isnew) {
280                 bzero(ondisk, HAMMER_BUFSIZE);
281                 hammer_alist_init(&scl->clu_alist, 0, 0, isnew);
282                 initbuffer(&scl->buf_alist, &ondisk->head,
283                            HAMMER_FSBUF_SUPERCL);
284                 scl->cache.modified = 1;
285         }
286         return(scl);
287 }
288
289 void
290 rel_supercl(struct supercl_info *supercl)
291 {
292         struct volume_info *volume;
293
294         assert(supercl->cache.refs > 0);
295         if (--supercl->cache.refs == 0) {
296                 if (supercl->cache.delete) {
297                         volume = supercl->volume;
298                         if (supercl->cache.modified)
299                                 flush_supercl(supercl);
300                         TAILQ_REMOVE(&volume->supercl_list, supercl, entry);
301                         hammer_cache_del(&supercl->cache);
302                         free(supercl->ondisk);
303                         free(supercl);
304                         rel_volume(volume);
305                 }
306         }
307 }
308
309 struct cluster_info *
310 get_cluster(struct volume_info *vol, int32_t clu_no, hammer_alloc_state_t isnew)
311 {
312         struct hammer_cluster_ondisk *ondisk;
313         struct cluster_info *cl;
314         int32_t scl_group;
315         int64_t scl_group_size;
316         int64_t clusterSize = vol->ondisk->vol_clsize;
317         int n;
318
319         TAILQ_FOREACH(cl, &vol->cluster_list, entry) {
320                 if (cl->clu_no == clu_no)
321                         break;
322         }
323         if (cl == NULL) {
324                 /*
325                  * Allocate the cluster
326                  */
327                 cl = malloc(sizeof(*cl));
328                 bzero(cl, sizeof(*cl));
329                 TAILQ_INIT(&cl->buffer_list);
330                 cl->clu_no = clu_no;
331                 cl->volume = vol;
332                 TAILQ_INSERT_TAIL(&vol->cluster_list, cl, entry);
333                 ++vol->cache.refs;
334                 cl->cache.u.cluster = cl;
335                 hammer_cache_add(&cl->cache, ISCLUSTER);
336                 if (vol->using_supercl) {
337                         cl->supercl = get_supercl(vol, clu_no / HAMMER_SCL_MAXCLUSTERS, 0);
338                         ++cl->supercl->cache.refs;
339                 }
340
341                 /*
342                  * Calculate the cluster's offset in the volume
343                  *
344                  * The arrangement is [scl * N][N * 32768 clusters], repeat.
345                  * N is typically 16.
346                  *
347                  * Note that the cluster offset calculation is slightly
348                  * different from the supercluster offset calculation due
349                  * to the way the grouping works.
350                  */
351                 if (vol->using_supercl) {
352                         scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
353                                     HAMMER_SCL_MAXCLUSTERS;
354                         scl_group_size = 
355                                 ((int64_t)HAMMER_BUFSIZE *
356                                 HAMMER_VOL_SUPERCLUSTER_GROUP) +
357                                 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
358                                 clusterSize * HAMMER_SCL_MAXCLUSTERS);
359                         scl_group_size += HAMMER_VOL_SUPERCLUSTER_GROUP *
360                                           HAMMER_BUFSIZE;
361                         cl->clu_offset =
362                                 vol->ondisk->vol_clo_beg +
363                                 scl_group * scl_group_size +
364                                 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
365                                  ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS * HAMMER_VOL_SUPERCLUSTER_GROUP)) *
366                                  clusterSize;
367                 } else {
368                         cl->clu_offset = vol->ondisk->vol_clo_beg +
369                                          (int64_t)clu_no * clusterSize;
370                 }
371         }
372         ++cl->cache.refs;
373         hammer_cache_flush();
374         if ((ondisk = cl->ondisk) == NULL) {
375                 cl->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
376                 cl->alist_master.config = &Clu_master_alist_config;
377                 cl->alist_master.meta = ondisk->clu_master_meta;
378                 cl->alist_btree.config = &Clu_slave_alist_config;
379                 cl->alist_btree.meta = ondisk->clu_btree_meta;
380                 cl->alist_btree.info = cl;
381                 cl->alist_record.config = &Clu_slave_alist_config;
382                 cl->alist_record.meta = ondisk->clu_record_meta;
383                 cl->alist_record.info = cl;
384                 cl->alist_mdata.config = &Clu_slave_alist_config;
385                 cl->alist_mdata.meta = ondisk->clu_mdata_meta;
386                 cl->alist_mdata.info = cl;
387                 if (isnew == 0) {
388                         n = pread(vol->fd, ondisk, HAMMER_BUFSIZE,
389                                   cl->clu_offset);
390                         if (n != HAMMER_BUFSIZE) {
391                                 err(1, "get_cluster: %s:%d Read failed "
392                                     "at offset %lld",
393                                     vol->name, clu_no, cl->clu_offset);
394                         }
395                 }
396         }
397         if (isnew) {
398                 bzero(ondisk, HAMMER_BUFSIZE);
399                 hammer_alist_init(&cl->alist_master, 0, 0, isnew);
400                 hammer_alist_init(&cl->alist_btree, 0, 0, HAMMER_ASTATE_ALLOC);
401                 hammer_alist_init(&cl->alist_record, 0, 0, HAMMER_ASTATE_ALLOC);
402                 hammer_alist_init(&cl->alist_mdata, 0, 0, HAMMER_ASTATE_ALLOC);
403                 cl->cache.modified = 1;
404         }
405         return(cl);
406 }
407
408 void
409 rel_cluster(struct cluster_info *cluster)
410 {
411         struct volume_info *volume;
412         struct supercl_info *supercl;
413
414         assert(cluster->cache.refs > 0);
415         if (--cluster->cache.refs == 0) {
416                 if (cluster->cache.delete) {
417                         volume = cluster->volume;
418                         supercl = cluster->supercl;
419                         if (cluster->cache.modified)
420                                 flush_cluster(cluster);
421                         TAILQ_REMOVE(&volume->cluster_list, cluster, entry);
422                         hammer_cache_del(&cluster->cache);
423                         free(cluster->ondisk);
424                         free(cluster);
425                         rel_volume(volume);
426                         if (supercl)
427                                 rel_supercl(supercl);
428                 }
429         }
430 }
431
432 /*
433  * Acquire the specified buffer.
434  * 
435  * We are formatting a new buffer is buf_type != 0
436  */
437 struct buffer_info *
438 get_buffer(struct cluster_info *cl, int32_t buf_no, int64_t buf_type)
439 {
440         hammer_fsbuf_ondisk_t ondisk;
441         struct buffer_info *buf;
442         int n;
443
444         /*
445          * Find the buffer.  Note that buffer 0 corresponds to the cluster
446          * header and should never be requested.
447          */
448         assert(buf_no != 0);
449         TAILQ_FOREACH(buf, &cl->buffer_list, entry) {
450                 if (buf->buf_no == buf_no)
451                         break;
452         }
453         if (buf == NULL) {
454                 buf = malloc(sizeof(*buf));
455                 bzero(buf, sizeof(*buf));
456                 buf->buf_no = buf_no;
457                 buf->buf_offset = cl->clu_offset + buf_no * HAMMER_BUFSIZE;
458                 buf->cluster = cl;
459                 buf->volume = cl->volume;
460                 TAILQ_INSERT_TAIL(&cl->buffer_list, buf, entry);
461                 ++cl->cache.refs;
462                 buf->cache.u.buffer = buf;
463                 hammer_cache_add(&buf->cache, ISBUFFER);
464         }
465         ++buf->cache.refs;
466         hammer_cache_flush();
467         if ((ondisk = buf->ondisk) == NULL) {
468                 buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
469                 buf->alist.config = &Buf_alist_config;
470                 buf->alist.meta = ondisk->head.buf_almeta;
471                 if (buf_type == 0) {
472                         n = pread(cl->volume->fd, ondisk, HAMMER_BUFSIZE,
473                                   buf->buf_offset);
474                         if (n != HAMMER_BUFSIZE) {
475                                 err(1, "get_buffer: %s:%d:%d Read failed at "
476                                        "offset %lld",
477                                     cl->volume->name, buf->cluster->clu_no,
478                                     buf_no, buf->buf_offset);
479                         }
480                 }
481         }
482         if (buf_type) {
483                 bzero(ondisk, HAMMER_BUFSIZE);
484                 initbuffer(&buf->alist, &ondisk->head, buf_type);
485                 buf->cache.modified = 1;
486         }
487         return(buf);
488 }
489
490 void
491 rel_buffer(struct buffer_info *buffer)
492 {
493         struct cluster_info *cluster;
494
495         assert(buffer->cache.refs > 0);
496         if (--buffer->cache.refs == 0) {
497                 if (buffer->cache.delete) {
498                         cluster = buffer->cluster;
499                         if (buffer->cache.modified)
500                                 flush_buffer(buffer);
501                         TAILQ_REMOVE(&cluster->buffer_list, buffer, entry);
502                         hammer_cache_del(&buffer->cache);
503                         free(buffer->ondisk);
504                         free(buffer);
505                         rel_cluster(cluster);
506                 }
507         }
508 }
509
510 /*
511  * Retrieve a pointer to a B-Tree node given a cluster offset.  The underlying
512  * bufp is freed if non-NULL and a referenced buffer is loaded into it.
513  */
514 hammer_node_ondisk_t
515 get_node(struct cluster_info *cl, int32_t offset, struct buffer_info **bufp)
516 {
517         struct buffer_info *buf;
518
519         if (*bufp)
520                 rel_buffer(*bufp);
521         *bufp = buf = get_buffer(cl, offset / HAMMER_BUFSIZE, 0);
522         if (buf->ondisk->head.buf_type != HAMMER_FSBUF_BTREE) {
523                 errx(1, "get_node %d:%d:%d - not a B-Tree node buffer!",
524                      cl->volume->vol_no, cl->clu_no, offset);
525         }
526         return((void *)((char *)buf->ondisk + (offset & HAMMER_BUFMASK)));
527 }
528
529 /*
530  * Allocate HAMMER elements - btree nodes, data storage, and record elements
531  */
532 void *
533 alloc_btree_element(struct cluster_info *cluster, int32_t *offp)
534 {
535         struct buffer_info *buf;
536         hammer_alist_t live;
537         int32_t elm_no;
538         void *item;
539
540         live = &cluster->alist_btree;
541         elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
542         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
543                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
544         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
545                 alloc_new_buffer(cluster, live,
546                                  HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES);
547                 ++cluster->ondisk->stat_idx_bufs;
548                 ++cluster->volume->ondisk->vol_stat_idx_bufs;
549                 ++cluster->volume->ondisk->vol0_stat_idx_bufs;
550                 elm_no = hammer_alist_alloc(live, 1);
551                 assert(elm_no != HAMMER_ALIST_BLOCK_NONE);
552         }
553         cluster->ondisk->idx_index = elm_no;
554         buf = get_buffer(cluster, elm_no / HAMMER_FSBUF_MAXBLKS, 0);
555         assert(buf->ondisk->head.buf_type != 0);
556         item = &buf->ondisk->btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK];
557         *offp = buf->buf_no * HAMMER_BUFSIZE +
558                 ((char *)item - (char *)buf->ondisk);
559         return(item);
560 }
561
562 void *
563 alloc_data_element(struct cluster_info *cluster, int32_t bytes, int32_t *offp)
564 {
565         struct buffer_info *buf;
566         hammer_alist_t live;
567         int32_t elm_no;
568         int32_t nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
569         void *item;
570
571         /*
572          * Try to allocate a btree-node.  If elm_no is HAMMER_ALIST_BLOCK_NONE
573          * and buf is non-NULL we have to initialize a new buffer's a-list.
574          */
575         live = &cluster->alist_mdata;
576         elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
577         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
578                 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
579         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
580                 alloc_new_buffer(cluster, live,
581                                  HAMMER_FSBUF_DATA, HAMMER_DATA_NODES);
582                 ++cluster->ondisk->stat_data_bufs;
583                 ++cluster->volume->ondisk->vol_stat_data_bufs;
584                 ++cluster->volume->ondisk->vol0_stat_data_bufs;
585                 elm_no = hammer_alist_alloc(live, nblks);
586                 assert(elm_no != HAMMER_ALIST_BLOCK_NONE);
587         }
588         cluster->ondisk->idx_index = elm_no;
589         buf = get_buffer(cluster, elm_no / HAMMER_FSBUF_MAXBLKS, 0);
590         assert(buf->ondisk->head.buf_type != 0);
591         item = &buf->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
592         *offp = buf->buf_no * HAMMER_BUFSIZE +
593                 ((char *)item - (char *)buf->ondisk);
594         return(item);
595 }
596
597 void *
598 alloc_record_element(struct cluster_info *cluster, int32_t *offp)
599 {
600         struct buffer_info *buf;
601         hammer_alist_t live;
602         int32_t elm_no;
603         void *item;
604
605         live = &cluster->alist_record;
606         elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
607         if (elm_no == HAMMER_ALIST_BLOCK_NONE)
608                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
609         if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
610                 alloc_new_buffer(cluster, live,
611                                  HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES);
612                 ++cluster->ondisk->stat_rec_bufs;
613                 ++cluster->volume->ondisk->vol_stat_rec_bufs;
614                 ++cluster->volume->ondisk->vol0_stat_rec_bufs;
615                 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
616                 assert(elm_no != HAMMER_ALIST_BLOCK_NONE);
617         }
618         cluster->ondisk->idx_record = elm_no;
619         buf = get_buffer(cluster, elm_no / HAMMER_FSBUF_MAXBLKS, 0);
620         assert(buf->ondisk->head.buf_type != 0);
621         item = &buf->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
622         *offp = buf->buf_no * HAMMER_BUFSIZE +
623                 ((char *)item - (char *)buf->ondisk);
624         return(item);
625 }
626
627 static void
628 alloc_new_buffer(struct cluster_info *cluster, hammer_alist_t live,
629                  u_int64_t type, int32_t nelements)
630 {
631         int32_t buf_no;
632         struct buffer_info *buf;
633
634         if (type == HAMMER_FSBUF_RECORDS) {
635                 buf_no = hammer_alist_alloc_rev(&cluster->alist_master, 1,
636                                                 HAMMER_ALIST_BLOCK_MAX);
637         } else {
638                 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master, 1, 
639                                                 0);
640         }
641         assert(buf_no != HAMMER_ALIST_BLOCK_NONE);
642         buf = get_buffer(cluster, buf_no, type);
643         hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS, nelements);
644         if (type == HAMMER_FSBUF_RECORDS) {
645                 cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5] |=
646                         1 << (buf_no & 31);
647         }
648 /*      rel_buffer(buffer);XXX modified bit for multiple gets/rels */
649 }
650
651 /*
652  * Flush various tracking structures to disk
653  */
654
655 /*
656  * Flush various tracking structures to disk
657  */
658 void
659 flush_all_volumes(void)
660 {
661         struct volume_info *vol;
662
663         TAILQ_FOREACH(vol, &VolList, entry)
664                 flush_volume(vol);
665 }
666
667 void
668 flush_volume(struct volume_info *vol)
669 {
670         struct supercl_info *supercl;
671         struct cluster_info *cl;
672
673         TAILQ_FOREACH(supercl, &vol->supercl_list, entry)
674                 flush_supercl(supercl);
675         TAILQ_FOREACH(cl, &vol->cluster_list, entry)
676                 flush_cluster(cl);
677         writehammerbuf(vol, vol->ondisk, 0);
678         vol->cache.modified = 0;
679 }
680
681 void
682 flush_supercl(struct supercl_info *supercl)
683 {
684         int64_t supercl_offset;
685
686         supercl_offset = supercl->scl_offset;
687         writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
688         supercl->cache.modified = 0;
689 }
690
691 void
692 flush_cluster(struct cluster_info *cl)
693 {
694         struct buffer_info *buf;
695         int64_t cluster_offset;
696
697         TAILQ_FOREACH(buf, &cl->buffer_list, entry)
698                 flush_buffer(buf);
699         cluster_offset = cl->clu_offset;
700         writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
701         cl->cache.modified = 0;
702 }
703
704 void
705 flush_buffer(struct buffer_info *buf)
706 {
707         writehammerbuf(buf->volume, buf->ondisk, buf->buf_offset);
708         buf->cache.modified = 0;
709 }
710
711 /*
712  * Generic buffer initialization
713  */
714 static void
715 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
716 {
717         head->buf_type = type;
718         hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
719 }
720
721 #if 0
722 /*
723  * Core I/O operations
724  */
725 static void
726 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
727 {
728         ssize_t n;
729
730         n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
731         if (n != HAMMER_BUFSIZE)
732                 err(1, "Read volume %d (%s)", vol->vol_no, vol->name);
733 }
734
735 #endif
736
737 static void
738 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
739 {
740         ssize_t n;
741
742         n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
743         if (n != HAMMER_BUFSIZE)
744                 err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
745 }
746
747 void
748 panic(const char *ctl, ...)
749 {
750         va_list va;
751
752         va_start(va, ctl);
753         vfprintf(stderr, ctl, va);
754         va_end(va);
755         fprintf(stderr, "\n");
756         exit(1);
757 }
758