sbin/hammer: Make some readonly pointer args const
[dragonfly.git] / sbin / hammer / ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/diskslice.h>
36 #include <sys/diskmbr.h>
37
38 #include "hammer_util.h"
39
40 static void check_volume(volume_info_t volume);
41 static void get_buffer_readahead(buffer_info_t base);
42 static __inline int readhammervol(volume_info_t volume);
43 static __inline int readhammerbuf(buffer_info_t buffer);
44 static __inline int writehammervol(volume_info_t volume);
45 static __inline int writehammerbuf(buffer_info_t buffer);
46
47 uuid_t Hammer_FSType;
48 uuid_t Hammer_FSId;
49 int UseReadBehind = -4;
50 int UseReadAhead = 4;
51 int DebugOpt;
52 uint32_t HammerVersion = -1;
53
54 TAILQ_HEAD(volume_list, volume_info);
55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
56 static int valid_hammer_volumes;
57
58 static __inline
59 int
60 buffer_hash(hammer_off_t zone2_offset)
61 {
62         int hi;
63
64         hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
65         return(hi);
66 }
67
68 static buffer_info_t
69 find_buffer(hammer_off_t zone2_offset)
70 {
71         volume_info_t volume;
72         buffer_info_t buffer;
73         int hi;
74
75         volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
76         assert(volume);
77
78         hi = buffer_hash(zone2_offset);
79         TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry)
80                 if (buffer->zone2_offset == zone2_offset)
81                         return(buffer);
82         return(NULL);
83 }
84
85 static
86 volume_info_t
87 __alloc_volume(const char *volname, int oflags)
88 {
89         volume_info_t volume;
90         int i;
91
92         volume = calloc(1, sizeof(*volume));
93         volume->vol_no = -1;
94         volume->rdonly = (oflags == O_RDONLY);
95         volume->name = strdup(volname);
96         volume->fd = open(volume->name, oflags);
97         if (volume->fd < 0) {
98                 err(1, "alloc_volume: Failed to open %s", volume->name);
99                 /* not reached */
100         }
101         check_volume(volume);
102
103         volume->ondisk = calloc(1, HAMMER_BUFSIZE);
104
105         for (i = 0; i < HAMMER_BUFLISTS; ++i)
106                 TAILQ_INIT(&volume->buffer_lists[i]);
107
108         return(volume);
109 }
110
111 static void
112 __add_volume(const volume_info_t volume)
113 {
114         volume_info_t scan;
115         struct stat st1, st2;
116
117         if (fstat(volume->fd, &st1) != 0) {
118                 errx(1, "add_volume: %s: Failed to stat", volume->name);
119                 /* not reached */
120         }
121
122         TAILQ_FOREACH(scan, &VolList, entry) {
123                 if (scan->vol_no == volume->vol_no) {
124                         errx(1, "add_volume: %s: Duplicate volume number %d "
125                                 "against %s",
126                                 volume->name, volume->vol_no, scan->name);
127                         /* not reached */
128                 }
129                 if (fstat(scan->fd, &st2) != 0) {
130                         errx(1, "add_volume: %s: Failed to stat %s",
131                                 volume->name, scan->name);
132                         /* not reached */
133                 }
134                 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
135                         errx(1, "add_volume: %s: Specified more than once",
136                                 volume->name);
137                         /* not reached */
138                 }
139         }
140
141         TAILQ_INSERT_TAIL(&VolList, volume, entry);
142 }
143
144 static void
145 __verify_volume(const volume_info_t volume)
146 {
147         hammer_volume_ondisk_t ondisk = volume->ondisk;
148
149         if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
150                 errx(1, "verify_volume: Invalid volume signature %016jx",
151                         ondisk->vol_signature);
152                 /* not reached */
153         }
154         if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) {
155                 errx(1, "verify_volume: Invalid root volume# %d",
156                         ondisk->vol_rootvol);
157                 /* not reached */
158         }
159         if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) {
160                 errx(1, "verify_volume: %s: Header does not indicate "
161                         "that this is a HAMMER volume", volume->name);
162                 /* not reached */
163         }
164         if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) {
165                 errx(1, "verify_volume: %s: FSId does not match other volumes!",
166                         volume->name);
167                 /* not reached */
168         }
169 }
170
171 /*
172  * Initialize a volume structure and ondisk vol_no field.
173  */
174 volume_info_t
175 init_volume(const char *filename, int oflags, int32_t vol_no)
176 {
177         volume_info_t volume;
178
179         volume = __alloc_volume(filename, oflags);
180         volume->vol_no = volume->ondisk->vol_no = vol_no;
181
182         __add_volume(volume);
183
184         return(volume);
185 }
186
187 /*
188  * Initialize a volume structure and read ondisk volume header.
189  */
190 volume_info_t
191 load_volume(const char *filename, int oflags, int verify)
192 {
193         volume_info_t volume;
194         int n;
195
196         volume = __alloc_volume(filename, oflags);
197
198         n = readhammervol(volume);
199         if (n == -1) {
200                 err(1, "load_volume: %s: Read failed at offset 0",
201                     volume->name);
202                 /* not reached */
203         }
204         volume->vol_no = volume->ondisk->vol_no;
205         HammerVersion = volume->ondisk->vol_version;
206
207         if (valid_hammer_volumes++ == 0)
208                 Hammer_FSId = volume->ondisk->vol_fsid;
209         if (verify)
210                 __verify_volume(volume);
211
212         __add_volume(volume);
213
214         return(volume);
215 }
216
217 /*
218  * Check basic volume characteristics.
219  */
220 static void
221 check_volume(volume_info_t volume)
222 {
223         struct partinfo pinfo;
224         struct stat st;
225
226         /*
227          * Allow the formatting of block devices or regular files
228          */
229         if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) {
230                 if (fstat(volume->fd, &st) < 0) {
231                         err(1, "Unable to stat %s", volume->name);
232                         /* not reached */
233                 }
234                 if (S_ISREG(st.st_mode)) {
235                         volume->size = st.st_size;
236                         volume->type = "REGFILE";
237                 } else {
238                         errx(1, "Unsupported file type for %s", volume->name);
239                         /* not reached */
240                 }
241         } else {
242                 /*
243                  * When formatting a block device as a HAMMER volume the
244                  * sector size must be compatible.  HAMMER uses 16384 byte
245                  * filesystem buffers.
246                  */
247                 if (pinfo.reserved_blocks) {
248                         errx(1, "HAMMER cannot be placed in a partition "
249                                 "which overlaps the disklabel or MBR");
250                         /* not reached */
251                 }
252                 if (pinfo.media_blksize > HAMMER_BUFSIZE ||
253                     HAMMER_BUFSIZE % pinfo.media_blksize) {
254                         errx(1, "A media sector size of %d is not supported",
255                              pinfo.media_blksize);
256                         /* not reached */
257                 }
258
259                 volume->size = pinfo.media_size;
260                 volume->device_offset = pinfo.media_offset;
261                 volume->type = "DEVICE";
262         }
263 }
264
265 int
266 is_regfile(const volume_info_t volume)
267 {
268         return(strcmp(volume->type, "REGFILE") ? 0 : 1);
269 }
270
271 void
272 assert_volume_offset(const volume_info_t volume)
273 {
274         assert(hammer_is_zone_raw_buffer(volume->vol_free_off));
275         assert(hammer_is_zone_raw_buffer(volume->vol_free_end));
276         if (volume->vol_free_off >= volume->vol_free_end) {
277                 errx(1, "Ran out of room, filesystem too small");
278                 /* not reached */
279         }
280 }
281
282 volume_info_t
283 get_volume(int32_t vol_no)
284 {
285         volume_info_t volume;
286
287         TAILQ_FOREACH(volume, &VolList, entry) {
288                 if (volume->vol_no == vol_no)
289                         break;
290         }
291
292         return(volume);
293 }
294
295 volume_info_t
296 get_root_volume(void)
297 {
298         return(get_volume(HAMMER_ROOT_VOLNO));
299 }
300
301 static hammer_off_t
302 __blockmap_xlate_to_zone2(hammer_off_t buf_offset)
303 {
304         hammer_off_t zone2_offset;
305         int error = 0;
306
307         if (hammer_is_zone_raw_buffer(buf_offset))
308                 zone2_offset = buf_offset;
309         else
310                 zone2_offset = blockmap_lookup(buf_offset, &error);
311
312         if (error)
313                 return(HAMMER_OFF_BAD);
314         assert(hammer_is_zone_raw_buffer(zone2_offset));
315
316         return(zone2_offset);
317 }
318
319 static buffer_info_t
320 __alloc_buffer(hammer_off_t zone2_offset, int isnew)
321 {
322         volume_info_t volume;
323         buffer_info_t buffer;
324         int hi;
325
326         volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
327         assert(volume != NULL);
328
329         buffer = calloc(1, sizeof(*buffer));
330         buffer->zone2_offset = zone2_offset;
331         buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset);
332         buffer->volume = volume;
333         buffer->ondisk = calloc(1, HAMMER_BUFSIZE);
334
335         if (isnew <= 0) {
336                 if (readhammerbuf(buffer) == -1) {
337                         err(1, "Failed to read %s:%016jx at %016jx",
338                             volume->name,
339                             (intmax_t)buffer->zone2_offset,
340                             (intmax_t)buffer->raw_offset);
341                         /* not reached */
342                 }
343         }
344
345         hi = buffer_hash(zone2_offset);
346         TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry);
347         hammer_cache_add(&buffer->cache);
348
349         return(buffer);
350 }
351
352 /*
353  * Acquire the 16KB buffer for specified zone offset.
354  */
355 static buffer_info_t
356 get_buffer(hammer_off_t buf_offset, int isnew)
357 {
358         buffer_info_t buffer;
359         hammer_off_t zone2_offset;
360         int dora = 0;
361
362         zone2_offset = __blockmap_xlate_to_zone2(buf_offset);
363         if (zone2_offset == HAMMER_OFF_BAD)
364                 return(NULL);
365
366         zone2_offset &= ~HAMMER_BUFMASK64;
367         buffer = find_buffer(zone2_offset);
368
369         if (buffer == NULL) {
370                 buffer = __alloc_buffer(zone2_offset, isnew);
371                 dora = (isnew == 0);
372         } else {
373                 assert(isnew != -1);
374                 hammer_cache_used(&buffer->cache);
375         }
376         assert(buffer->ondisk != NULL);
377
378         ++buffer->cache.refs;
379         hammer_cache_flush();
380
381         if (isnew > 0) {
382                 assert(buffer->cache.modified == 0);
383                 bzero(buffer->ondisk, HAMMER_BUFSIZE);
384                 buffer->cache.modified = 1;
385         }
386         if (dora)
387                 get_buffer_readahead(buffer);
388         return(buffer);
389 }
390
391 static void
392 get_buffer_readahead(const buffer_info_t base)
393 {
394         buffer_info_t buffer;
395         volume_info_t volume;
396         hammer_off_t zone2_offset;
397         int64_t raw_offset;
398         int ri = UseReadBehind;
399         int re = UseReadAhead;
400
401         raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
402         volume = base->volume;
403
404         while (ri < re) {
405                 if (raw_offset >= volume->ondisk->vol_buf_end)
406                         break;
407                 if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) {
408                         ++ri;
409                         raw_offset += HAMMER_BUFSIZE;
410                         continue;
411                 }
412                 zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
413                         raw_offset - volume->ondisk->vol_buf_beg);
414                 buffer = find_buffer(zone2_offset);
415                 if (buffer == NULL) {
416                         /* call with -1 to prevent another readahead */
417                         buffer = get_buffer(zone2_offset, -1);
418                         rel_buffer(buffer);
419                 }
420                 ++ri;
421                 raw_offset += HAMMER_BUFSIZE;
422         }
423 }
424
425 void
426 rel_buffer(buffer_info_t buffer)
427 {
428         volume_info_t volume;
429         int hi;
430
431         if (buffer == NULL)
432                 return;
433         assert(buffer->cache.refs > 0);
434         if (--buffer->cache.refs == 0) {
435                 if (buffer->cache.delete) {
436                         hi = buffer_hash(buffer->zone2_offset);
437                         volume = buffer->volume;
438                         if (buffer->cache.modified)
439                                 flush_buffer(buffer);
440                         TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
441                         hammer_cache_del(&buffer->cache);
442                         free(buffer->ondisk);
443                         free(buffer);
444                 }
445         }
446 }
447
448 /*
449  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
450  * bufferp is freed if isnew or the offset is out of range of the cached data.
451  * If bufferp is freed a referenced buffer is loaded into it.
452  */
453 void *
454 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew)
455 {
456         hammer_off_t xor;
457
458         if (*bufferp != NULL) {
459                 /* XXX xor is always non zero for indirect zones */
460                 xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^
461                       HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset);
462                 if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) {
463                         rel_buffer(*bufferp);
464                         *bufferp = NULL;
465                 }
466         }
467
468         if (*bufferp == NULL) {
469                 *bufferp = get_buffer(buf_offset, isnew);
470                 if (*bufferp == NULL)
471                         return(NULL);
472         }
473
474         return(((char *)(*bufferp)->ondisk) +
475                 ((int32_t)buf_offset & HAMMER_BUFMASK));
476 }
477
478 /*
479  * Allocate HAMMER elements - B-Tree nodes
480  */
481 hammer_node_ondisk_t
482 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp)
483 {
484         hammer_node_ondisk_t node;
485
486         node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
487                               offp, data_bufferp);
488         bzero(node, sizeof(*node));
489         return(node);
490 }
491
492 /*
493  * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc)
494  */
495 void *
496 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
497                    buffer_info_t *data_bufferp)
498 {
499         void *data;
500
501         data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
502                               offp, data_bufferp);
503         bzero(data, data_len);
504         return(data);
505 }
506
507 /*
508  * Format a new blockmap.  This is mostly a degenerate case because
509  * all allocations are now actually done from the freemap.
510  */
511 void
512 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset)
513 {
514         hammer_blockmap_t blockmap;
515         hammer_off_t zone_base;
516
517         /* Only root volume needs formatting */
518         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
519
520         assert(hammer_is_index_record(zone));
521
522         blockmap = &root_vol->ondisk->vol0_blockmap[zone];
523         zone_base = HAMMER_ZONE_ENCODE(zone, offset);
524
525         bzero(blockmap, sizeof(*blockmap));
526         blockmap->phys_offset = 0;
527         blockmap->first_offset = zone_base;
528         blockmap->next_offset = zone_base;
529         blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
530         hammer_crc_set_blockmap(HammerVersion, blockmap);
531 }
532
533 /*
534  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
535  * code will load each volume's freemap.
536  */
537 void
538 format_freemap(volume_info_t root_vol)
539 {
540         buffer_info_t buffer = NULL;
541         hammer_off_t layer1_offset;
542         hammer_blockmap_t blockmap;
543         hammer_blockmap_layer1_t layer1;
544         int i, isnew;
545
546         /* Only root volume needs formatting */
547         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
548
549         layer1_offset = bootstrap_bigblock(root_vol);
550         for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
551                 isnew = ((i % HAMMER_BUFSIZE) == 0);
552                 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
553                 bzero(layer1, sizeof(*layer1));
554                 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
555                 layer1->blocks_free = 0;
556                 hammer_crc_set_layer1(HammerVersion, layer1);
557         }
558         assert(i == HAMMER_BIGBLOCK_SIZE);
559         rel_buffer(buffer);
560
561         blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
562         bzero(blockmap, sizeof(*blockmap));
563         blockmap->phys_offset = layer1_offset;
564         blockmap->first_offset = 0;
565         blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
566         blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
567         hammer_crc_set_blockmap(HammerVersion, blockmap);
568 }
569
570 /*
571  * Load the volume's remaining free space into the freemap.
572  *
573  * Returns the number of big-blocks available.
574  */
575 int64_t
576 initialize_freemap(volume_info_t volume)
577 {
578         volume_info_t root_vol;
579         buffer_info_t buffer1 = NULL;
580         buffer_info_t buffer2 = NULL;
581         hammer_blockmap_layer1_t layer1;
582         hammer_blockmap_layer2_t layer2;
583         hammer_off_t layer1_offset;
584         hammer_off_t layer2_offset;
585         hammer_off_t phys_offset;
586         hammer_off_t block_offset;
587         hammer_off_t aligned_vol_free_end;
588         hammer_blockmap_t freemap;
589         int64_t count = 0;
590         int64_t layer1_count = 0;
591
592         root_vol = get_root_volume();
593
594         assert_volume_offset(volume);
595         aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
596
597         printf("initialize freemap volume %d\n", volume->vol_no);
598
599         /*
600          * Initialize the freemap.  First preallocate the big-blocks required
601          * to implement layer2.   This preallocation is a bootstrap allocation
602          * using blocks from the target volume.
603          */
604         freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
605
606         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
607              phys_offset < aligned_vol_free_end;
608              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
609                 layer1_offset = freemap->phys_offset +
610                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
611                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
612                 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
613                         layer1->phys_offset = bootstrap_bigblock(volume);
614                         layer1->blocks_free = 0;
615                         buffer1->cache.modified = 1;
616                         hammer_crc_set_layer1(HammerVersion, layer1);
617                 }
618         }
619
620         /*
621          * Now fill everything in.
622          */
623         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
624              phys_offset < aligned_vol_free_end;
625              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
626                 layer1_count = 0;
627                 layer1_offset = freemap->phys_offset +
628                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
629                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
630                 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
631
632                 for (block_offset = 0;
633                      block_offset < HAMMER_BLOCKMAP_LAYER2;
634                      block_offset += HAMMER_BIGBLOCK_SIZE) {
635                         layer2_offset = layer1->phys_offset +
636                                         HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
637                         layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
638                         bzero(layer2, sizeof(*layer2));
639
640                         if (phys_offset + block_offset < volume->vol_free_off) {
641                                 /*
642                                  * Big-blocks already allocated as part
643                                  * of the freemap bootstrap.
644                                  */
645                                 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
646                                 layer2->append_off = HAMMER_BIGBLOCK_SIZE;
647                                 layer2->bytes_free = 0;
648                         } else if (phys_offset + block_offset < volume->vol_free_end) {
649                                 layer2->zone = 0;
650                                 layer2->append_off = 0;
651                                 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
652                                 ++count;
653                                 ++layer1_count;
654                         } else {
655                                 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
656                                 layer2->append_off = HAMMER_BIGBLOCK_SIZE;
657                                 layer2->bytes_free = 0;
658                         }
659                         hammer_crc_set_layer2(HammerVersion, layer2);
660                         buffer2->cache.modified = 1;
661                 }
662
663                 layer1->blocks_free += layer1_count;
664                 hammer_crc_set_layer1(HammerVersion, layer1);
665                 buffer1->cache.modified = 1;
666         }
667
668         rel_buffer(buffer1);
669         rel_buffer(buffer2);
670         return(count);
671 }
672
673 /*
674  * Returns the number of big-blocks available for filesystem data and undos
675  * without formatting.
676  */
677 int64_t
678 count_freemap(const volume_info_t volume)
679 {
680         hammer_off_t phys_offset;
681         hammer_off_t vol_free_off;
682         hammer_off_t aligned_vol_free_end;
683         int64_t count = 0;
684
685         vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
686
687         assert_volume_offset(volume);
688         aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
689
690         if (volume->vol_no == HAMMER_ROOT_VOLNO)
691                 vol_free_off += HAMMER_BIGBLOCK_SIZE;
692
693         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
694              phys_offset < aligned_vol_free_end;
695              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
696                 vol_free_off += HAMMER_BIGBLOCK_SIZE;
697         }
698
699         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
700              phys_offset < aligned_vol_free_end;
701              phys_offset += HAMMER_BIGBLOCK_SIZE) {
702                 if (phys_offset < vol_free_off)
703                         ;
704                 else if (phys_offset < volume->vol_free_end)
705                         ++count;
706         }
707
708         return(count);
709 }
710
711 /*
712  * Format the undomap for the root volume.
713  */
714 void
715 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size)
716 {
717         hammer_off_t undo_limit;
718         hammer_blockmap_t blockmap;
719         hammer_volume_ondisk_t ondisk;
720         buffer_info_t buffer = NULL;
721         hammer_off_t scan;
722         int n;
723         int limit_index;
724         uint32_t seqno;
725
726         /* Only root volume needs formatting */
727         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
728         ondisk = root_vol->ondisk;
729
730         /*
731          * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
732          * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks.
733          * Size to approximately 0.1% of the disk.
734          *
735          * The minimum UNDO fifo size is 512MB, or approximately 1% of
736          * the recommended 50G disk.
737          *
738          * Changing this minimum is rather dangerous as complex filesystem
739          * operations can cause the UNDO FIFO to fill up otherwise.
740          */
741         undo_limit = *undo_buffer_size;
742         if (undo_limit == 0) {
743                 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000;
744                 if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS)
745                         undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS;
746         }
747         undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit);
748         if (undo_limit < HAMMER_BIGBLOCK_SIZE)
749                 undo_limit = HAMMER_BIGBLOCK_SIZE;
750         if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS)
751                 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS;
752         *undo_buffer_size = undo_limit;
753
754         blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
755         bzero(blockmap, sizeof(*blockmap));
756         blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
757         blockmap->first_offset = HAMMER_ENCODE_UNDO(0);
758         blockmap->next_offset = blockmap->first_offset;
759         blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit);
760         hammer_crc_set_blockmap(HammerVersion, blockmap);
761
762         limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
763         assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS);
764
765         for (n = 0; n < limit_index; ++n)
766                 ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol);
767         while (n < HAMMER_MAX_UNDO_BIGBLOCKS)
768                 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
769
770         /*
771          * Pre-initialize the UNDO blocks (HAMMER version 4+)
772          */
773         printf("initializing the undo map (%jd MB)\n",
774                 (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) /
775                 (1024 * 1024));
776
777         scan = blockmap->first_offset;
778         seqno = 0;
779
780         while (scan < blockmap->alloc_offset) {
781                 hammer_fifo_head_t head;
782                 hammer_fifo_tail_t tail;
783                 int isnew;
784                 int bytes = HAMMER_UNDO_ALIGN;
785
786                 isnew = ((scan & HAMMER_BUFMASK64) == 0);
787                 head = get_buffer_data(scan, &buffer, isnew);
788                 buffer->cache.modified = 1;
789                 tail = (void *)((char *)head + bytes - sizeof(*tail));
790
791                 bzero(head, bytes);
792                 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
793                 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
794                 head->hdr_size = bytes;
795                 head->hdr_seq = seqno++;
796
797                 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
798                 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
799                 tail->tail_size = bytes;
800
801                 hammer_crc_set_fifo_head(HammerVersion, head, bytes);
802
803                 scan += bytes;
804         }
805         rel_buffer(buffer);
806 }
807
808 const char *zone_labels[] = {
809         "",             /* 0 */
810         "raw_volume",   /* 1 */
811         "raw_buffer",   /* 2 */
812         "undo",         /* 3 */
813         "freemap",      /* 4 */
814         "",             /* 5 */
815         "",             /* 6 */
816         "",             /* 7 */
817         "btree",        /* 8 */
818         "meta",         /* 9 */
819         "large_data",   /* 10 */
820         "small_data",   /* 11 */
821         "",             /* 12 */
822         "",             /* 13 */
823         "",             /* 14 */
824         "unavail",      /* 15 */
825 };
826
827 void
828 print_blockmap(const volume_info_t volume)
829 {
830         hammer_blockmap_t blockmap;
831         hammer_volume_ondisk_t ondisk;
832         int64_t size, used;
833         int i;
834 #define INDENT ""
835
836         ondisk = volume->ondisk;
837         printf(INDENT"vol_label\t%s\n", ondisk->vol_label);
838         printf(INDENT"vol_count\t%d\n", ondisk->vol_count);
839         printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg));
840         printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg));
841         printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg));
842         printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end));
843         printf(INDENT"vol0_next_tid\t%016jx\n",
844                (uintmax_t)ondisk->vol0_next_tid);
845
846         blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
847         size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset);
848         if (blockmap->first_offset <= blockmap->next_offset)
849                 used = blockmap->next_offset - blockmap->first_offset;
850         else
851                 used = blockmap->alloc_offset - blockmap->first_offset +
852                         HAMMER_OFF_LONG_ENCODE(blockmap->next_offset);
853         printf(INDENT"undo_size\t%s\n", sizetostr(size));
854         printf(INDENT"undo_used\t%s\n", sizetostr(used));
855
856         printf(INDENT"zone #             "
857                "phys             first            next             alloc\n");
858         for (i = 0; i < HAMMER_MAX_ZONES; i++) {
859                 blockmap = &ondisk->vol0_blockmap[i];
860                 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n",
861                         i, zone_labels[i],
862                         (uintmax_t)blockmap->phys_offset,
863                         (uintmax_t)blockmap->first_offset,
864                         (uintmax_t)blockmap->next_offset,
865                         (uintmax_t)blockmap->alloc_offset);
866         }
867 }
868
869 /*
870  * Flush various tracking structures to disk
871  */
872 void
873 flush_all_volumes(void)
874 {
875         volume_info_t volume;
876
877         TAILQ_FOREACH(volume, &VolList, entry)
878                 flush_volume(volume);
879 }
880
881 void
882 flush_volume(volume_info_t volume)
883 {
884         buffer_info_t buffer;
885         int i;
886
887         for (i = 0; i < HAMMER_BUFLISTS; ++i) {
888                 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
889                         flush_buffer(buffer);
890         }
891         if (writehammervol(volume) == -1) {
892                 err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
893                 /* not reached */
894         }
895 }
896
897 void
898 flush_buffer(buffer_info_t buffer)
899 {
900         volume_info_t volume;
901
902         volume = buffer->volume;
903         if (writehammerbuf(buffer) == -1) {
904                 err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
905                 /* not reached */
906         }
907         buffer->cache.modified = 0;
908 }
909
910 /*
911  * Core I/O operations
912  */
913 static int
914 __read(volume_info_t volume, void *data, int64_t offset, int size)
915 {
916         ssize_t n;
917
918         n = pread(volume->fd, data, size, offset);
919         if (n != size)
920                 return(-1);
921         return(0);
922 }
923
924 static __inline int
925 readhammervol(volume_info_t volume)
926 {
927         return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
928 }
929
930 static __inline int
931 readhammerbuf(buffer_info_t buffer)
932 {
933         return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset,
934                 HAMMER_BUFSIZE));
935 }
936
937 static int
938 __write(volume_info_t volume, const void *data, int64_t offset, int size)
939 {
940         ssize_t n;
941
942         if (volume->rdonly)
943                 return(0);
944
945         n = pwrite(volume->fd, data, size, offset);
946         if (n != size)
947                 return(-1);
948         return(0);
949 }
950
951 static __inline int
952 writehammervol(volume_info_t volume)
953 {
954         return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
955 }
956
957 static __inline int
958 writehammerbuf(buffer_info_t buffer)
959 {
960         return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset,
961                 HAMMER_BUFSIZE));
962 }
963
964 int64_t init_boot_area_size(int64_t value, off_t avg_vol_size)
965 {
966         if (value == 0) {
967                 value = HAMMER_BOOT_NOMBYTES;
968                 while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
969                         value >>= 1;
970         }
971
972         if (value < HAMMER_BOOT_MINBYTES)
973                 value = HAMMER_BOOT_MINBYTES;
974         else if (value > HAMMER_BOOT_MAXBYTES)
975                 value = HAMMER_BOOT_MAXBYTES;
976
977         return(value);
978 }
979
980 int64_t init_memory_log_size(int64_t value, off_t avg_vol_size)
981 {
982         if (value == 0) {
983                 value = HAMMER_MEM_NOMBYTES;
984                 while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
985                         value >>= 1;
986         }
987
988         if (value < HAMMER_MEM_MINBYTES)
989                 value = HAMMER_MEM_MINBYTES;
990         else if (value > HAMMER_MEM_MAXBYTES)
991                 value = HAMMER_MEM_MAXBYTES;
992
993         return(value);
994 }