sbin/hammer: Test volume version in __verify_volume()
[dragonfly.git] / sbin / hammer / ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/diskslice.h>
36 #include <sys/diskmbr.h>
37
38 #include "hammer_util.h"
39
40 static void check_volume(volume_info_t volume);
41 static void get_buffer_readahead(buffer_info_t base);
42 static __inline int readhammervol(volume_info_t volume);
43 static __inline int readhammerbuf(buffer_info_t buffer);
44 static __inline int writehammervol(volume_info_t volume);
45 static __inline int writehammerbuf(buffer_info_t buffer);
46
47 uuid_t Hammer_FSType;
48 uuid_t Hammer_FSId;
49 int UseReadBehind = -4;
50 int UseReadAhead = 4;
51 int DebugOpt;
52 uint32_t HammerVersion = -1;
53
54 TAILQ_HEAD(volume_list, volume_info);
55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
56 static int valid_hammer_volumes;
57
58 static __inline
59 int
60 buffer_hash(hammer_off_t zone2_offset)
61 {
62         int hi;
63
64         hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
65         return(hi);
66 }
67
68 static buffer_info_t
69 find_buffer(hammer_off_t zone2_offset)
70 {
71         volume_info_t volume;
72         buffer_info_t buffer;
73         int hi;
74
75         volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
76         assert(volume);
77
78         hi = buffer_hash(zone2_offset);
79         TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry)
80                 if (buffer->zone2_offset == zone2_offset)
81                         return(buffer);
82         return(NULL);
83 }
84
85 static
86 volume_info_t
87 __alloc_volume(const char *volname, int oflags)
88 {
89         volume_info_t volume;
90         int i;
91
92         volume = calloc(1, sizeof(*volume));
93         volume->vol_no = -1;
94         volume->rdonly = (oflags == O_RDONLY);
95         volume->name = strdup(volname);
96         volume->fd = open(volume->name, oflags);
97         if (volume->fd < 0) {
98                 err(1, "alloc_volume: Failed to open %s", volume->name);
99                 /* not reached */
100         }
101         check_volume(volume);
102
103         volume->ondisk = calloc(1, HAMMER_BUFSIZE);
104
105         for (i = 0; i < HAMMER_BUFLISTS; ++i)
106                 TAILQ_INIT(&volume->buffer_lists[i]);
107
108         return(volume);
109 }
110
111 static void
112 __add_volume(const volume_info_t volume)
113 {
114         volume_info_t scan;
115         struct stat st1, st2;
116
117         if (fstat(volume->fd, &st1) != 0) {
118                 errx(1, "add_volume: %s: Failed to stat", volume->name);
119                 /* not reached */
120         }
121
122         TAILQ_FOREACH(scan, &VolList, entry) {
123                 if (scan->vol_no == volume->vol_no) {
124                         errx(1, "add_volume: %s: Duplicate volume number %d "
125                                 "against %s",
126                                 volume->name, volume->vol_no, scan->name);
127                         /* not reached */
128                 }
129                 if (fstat(scan->fd, &st2) != 0) {
130                         errx(1, "add_volume: %s: Failed to stat %s",
131                                 volume->name, scan->name);
132                         /* not reached */
133                 }
134                 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
135                         errx(1, "add_volume: %s: Specified more than once",
136                                 volume->name);
137                         /* not reached */
138                 }
139         }
140
141         TAILQ_INSERT_TAIL(&VolList, volume, entry);
142 }
143
144 static void
145 __verify_volume(const volume_info_t volume)
146 {
147         hammer_volume_ondisk_t ondisk = volume->ondisk;
148
149         if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
150                 errx(1, "verify_volume: Invalid volume signature %016jx",
151                         ondisk->vol_signature);
152                 /* not reached */
153         }
154         if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) {
155                 errx(1, "verify_volume: Invalid root volume# %d",
156                         ondisk->vol_rootvol);
157                 /* not reached */
158         }
159         if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) {
160                 errx(1, "verify_volume: %s: Header does not indicate "
161                         "that this is a HAMMER volume", volume->name);
162                 /* not reached */
163         }
164         if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) {
165                 errx(1, "verify_volume: %s: FSId does not match other volumes!",
166                         volume->name);
167                 /* not reached */
168         }
169         if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN ||
170             ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) {
171                 errx(1, "verify_volume: %s: Invalid volume version %u",
172                         volume->name, ondisk->vol_version);
173                 /* not reached */
174         }
175 }
176
177 /*
178  * Initialize a volume structure and ondisk vol_no field.
179  */
180 volume_info_t
181 init_volume(const char *filename, int oflags, int32_t vol_no)
182 {
183         volume_info_t volume;
184
185         volume = __alloc_volume(filename, oflags);
186         volume->vol_no = volume->ondisk->vol_no = vol_no;
187
188         __add_volume(volume);
189
190         return(volume);
191 }
192
193 /*
194  * Initialize a volume structure and read ondisk volume header.
195  */
196 volume_info_t
197 load_volume(const char *filename, int oflags, int verify_volume)
198 {
199         volume_info_t volume;
200         int n;
201
202         volume = __alloc_volume(filename, oflags);
203
204         n = readhammervol(volume);
205         if (n == -1) {
206                 err(1, "load_volume: %s: Read failed at offset 0",
207                     volume->name);
208                 /* not reached */
209         }
210         volume->vol_no = volume->ondisk->vol_no;
211         HammerVersion = volume->ondisk->vol_version;
212
213         if (valid_hammer_volumes++ == 0)
214                 Hammer_FSId = volume->ondisk->vol_fsid;
215         if (verify_volume)
216                 __verify_volume(volume);
217
218         __add_volume(volume);
219
220         return(volume);
221 }
222
223 /*
224  * Check basic volume characteristics.
225  */
226 static void
227 check_volume(volume_info_t volume)
228 {
229         struct partinfo pinfo;
230         struct stat st;
231
232         /*
233          * Allow the formatting of block devices or regular files
234          */
235         if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) {
236                 if (fstat(volume->fd, &st) < 0) {
237                         err(1, "Unable to stat %s", volume->name);
238                         /* not reached */
239                 }
240                 if (S_ISREG(st.st_mode)) {
241                         volume->size = st.st_size;
242                         volume->type = "REGFILE";
243                 } else {
244                         errx(1, "Unsupported file type for %s", volume->name);
245                         /* not reached */
246                 }
247         } else {
248                 /*
249                  * When formatting a block device as a HAMMER volume the
250                  * sector size must be compatible.  HAMMER uses 16384 byte
251                  * filesystem buffers.
252                  */
253                 if (pinfo.reserved_blocks) {
254                         errx(1, "HAMMER cannot be placed in a partition "
255                                 "which overlaps the disklabel or MBR");
256                         /* not reached */
257                 }
258                 if (pinfo.media_blksize > HAMMER_BUFSIZE ||
259                     HAMMER_BUFSIZE % pinfo.media_blksize) {
260                         errx(1, "A media sector size of %d is not supported",
261                              pinfo.media_blksize);
262                         /* not reached */
263                 }
264
265                 volume->size = pinfo.media_size;
266                 volume->device_offset = pinfo.media_offset;
267                 volume->type = "DEVICE";
268         }
269 }
270
271 int
272 is_regfile(const volume_info_t volume)
273 {
274         return(strcmp(volume->type, "REGFILE") ? 0 : 1);
275 }
276
277 void
278 assert_volume_offset(const volume_info_t volume)
279 {
280         assert(hammer_is_zone_raw_buffer(volume->vol_free_off));
281         assert(hammer_is_zone_raw_buffer(volume->vol_free_end));
282         if (volume->vol_free_off >= volume->vol_free_end) {
283                 errx(1, "Ran out of room, filesystem too small");
284                 /* not reached */
285         }
286 }
287
288 volume_info_t
289 get_volume(int32_t vol_no)
290 {
291         volume_info_t volume;
292
293         TAILQ_FOREACH(volume, &VolList, entry) {
294                 if (volume->vol_no == vol_no)
295                         break;
296         }
297
298         return(volume);
299 }
300
301 volume_info_t
302 get_root_volume(void)
303 {
304         return(get_volume(HAMMER_ROOT_VOLNO));
305 }
306
307 static hammer_off_t
308 __blockmap_xlate_to_zone2(hammer_off_t buf_offset)
309 {
310         hammer_off_t zone2_offset;
311         int error = 0;
312
313         if (hammer_is_zone_raw_buffer(buf_offset))
314                 zone2_offset = buf_offset;
315         else
316                 zone2_offset = blockmap_lookup(buf_offset, &error);
317
318         if (error)
319                 return(HAMMER_OFF_BAD);
320         assert(hammer_is_zone_raw_buffer(zone2_offset));
321
322         return(zone2_offset);
323 }
324
325 static buffer_info_t
326 __alloc_buffer(hammer_off_t zone2_offset, int isnew)
327 {
328         volume_info_t volume;
329         buffer_info_t buffer;
330         int hi;
331
332         volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
333         assert(volume != NULL);
334
335         buffer = calloc(1, sizeof(*buffer));
336         buffer->zone2_offset = zone2_offset;
337         buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset);
338         buffer->volume = volume;
339         buffer->ondisk = calloc(1, HAMMER_BUFSIZE);
340
341         if (isnew <= 0) {
342                 if (readhammerbuf(buffer) == -1) {
343                         err(1, "Failed to read %s:%016jx at %016jx",
344                             volume->name,
345                             (intmax_t)buffer->zone2_offset,
346                             (intmax_t)buffer->raw_offset);
347                         /* not reached */
348                 }
349         }
350
351         hi = buffer_hash(zone2_offset);
352         TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry);
353         hammer_cache_add(&buffer->cache);
354
355         return(buffer);
356 }
357
358 /*
359  * Acquire the 16KB buffer for specified zone offset.
360  */
361 static buffer_info_t
362 get_buffer(hammer_off_t buf_offset, int isnew)
363 {
364         buffer_info_t buffer;
365         hammer_off_t zone2_offset;
366         int dora = 0;
367
368         zone2_offset = __blockmap_xlate_to_zone2(buf_offset);
369         if (zone2_offset == HAMMER_OFF_BAD)
370                 return(NULL);
371
372         zone2_offset &= ~HAMMER_BUFMASK64;
373         buffer = find_buffer(zone2_offset);
374
375         if (buffer == NULL) {
376                 buffer = __alloc_buffer(zone2_offset, isnew);
377                 dora = (isnew == 0);
378         } else {
379                 assert(isnew != -1);
380                 hammer_cache_used(&buffer->cache);
381         }
382         assert(buffer->ondisk != NULL);
383
384         ++buffer->cache.refs;
385         hammer_cache_flush();
386
387         if (isnew > 0) {
388                 assert(buffer->cache.modified == 0);
389                 bzero(buffer->ondisk, HAMMER_BUFSIZE);
390                 buffer->cache.modified = 1;
391         }
392         if (dora)
393                 get_buffer_readahead(buffer);
394         return(buffer);
395 }
396
397 static void
398 get_buffer_readahead(const buffer_info_t base)
399 {
400         buffer_info_t buffer;
401         volume_info_t volume;
402         hammer_off_t zone2_offset;
403         int64_t raw_offset;
404         int ri = UseReadBehind;
405         int re = UseReadAhead;
406
407         raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
408         volume = base->volume;
409
410         while (ri < re) {
411                 if (raw_offset >= volume->ondisk->vol_buf_end)
412                         break;
413                 if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) {
414                         ++ri;
415                         raw_offset += HAMMER_BUFSIZE;
416                         continue;
417                 }
418                 zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
419                         raw_offset - volume->ondisk->vol_buf_beg);
420                 buffer = find_buffer(zone2_offset);
421                 if (buffer == NULL) {
422                         /* call with -1 to prevent another readahead */
423                         buffer = get_buffer(zone2_offset, -1);
424                         rel_buffer(buffer);
425                 }
426                 ++ri;
427                 raw_offset += HAMMER_BUFSIZE;
428         }
429 }
430
431 void
432 rel_buffer(buffer_info_t buffer)
433 {
434         volume_info_t volume;
435         int hi;
436
437         if (buffer == NULL)
438                 return;
439         assert(buffer->cache.refs > 0);
440         if (--buffer->cache.refs == 0) {
441                 if (buffer->cache.delete) {
442                         hi = buffer_hash(buffer->zone2_offset);
443                         volume = buffer->volume;
444                         if (buffer->cache.modified)
445                                 flush_buffer(buffer);
446                         TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
447                         hammer_cache_del(&buffer->cache);
448                         free(buffer->ondisk);
449                         free(buffer);
450                 }
451         }
452 }
453
454 /*
455  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
456  * bufferp is freed if isnew or the offset is out of range of the cached data.
457  * If bufferp is freed a referenced buffer is loaded into it.
458  */
459 void *
460 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew)
461 {
462         hammer_off_t xor;
463
464         if (*bufferp != NULL) {
465                 /* XXX xor is always non zero for indirect zones */
466                 xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^
467                       HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset);
468                 if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) {
469                         rel_buffer(*bufferp);
470                         *bufferp = NULL;
471                 }
472         }
473
474         if (*bufferp == NULL) {
475                 *bufferp = get_buffer(buf_offset, isnew);
476                 if (*bufferp == NULL)
477                         return(NULL);
478         }
479
480         return(((char *)(*bufferp)->ondisk) +
481                 ((int32_t)buf_offset & HAMMER_BUFMASK));
482 }
483
484 /*
485  * Allocate HAMMER elements - B-Tree nodes
486  */
487 hammer_node_ondisk_t
488 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp)
489 {
490         hammer_node_ondisk_t node;
491
492         node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
493                               offp, data_bufferp);
494         bzero(node, sizeof(*node));
495         return(node);
496 }
497
498 /*
499  * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc)
500  */
501 void *
502 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
503                    buffer_info_t *data_bufferp)
504 {
505         void *data;
506
507         data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
508                               offp, data_bufferp);
509         bzero(data, data_len);
510         return(data);
511 }
512
513 /*
514  * Format a new blockmap.  This is mostly a degenerate case because
515  * all allocations are now actually done from the freemap.
516  */
517 void
518 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset)
519 {
520         hammer_blockmap_t blockmap;
521         hammer_off_t zone_base;
522
523         /* Only root volume needs formatting */
524         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
525
526         assert(hammer_is_index_record(zone));
527
528         blockmap = &root_vol->ondisk->vol0_blockmap[zone];
529         zone_base = HAMMER_ZONE_ENCODE(zone, offset);
530
531         bzero(blockmap, sizeof(*blockmap));
532         blockmap->phys_offset = 0;
533         blockmap->first_offset = zone_base;
534         blockmap->next_offset = zone_base;
535         blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
536         hammer_crc_set_blockmap(HammerVersion, blockmap);
537 }
538
539 /*
540  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
541  * code will load each volume's freemap.
542  */
543 void
544 format_freemap(volume_info_t root_vol)
545 {
546         buffer_info_t buffer = NULL;
547         hammer_off_t layer1_offset;
548         hammer_blockmap_t blockmap;
549         hammer_blockmap_layer1_t layer1;
550         int i, isnew;
551
552         /* Only root volume needs formatting */
553         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
554
555         layer1_offset = bootstrap_bigblock(root_vol);
556         for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
557                 isnew = ((i % HAMMER_BUFSIZE) == 0);
558                 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
559                 bzero(layer1, sizeof(*layer1));
560                 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
561                 layer1->blocks_free = 0;
562                 hammer_crc_set_layer1(HammerVersion, layer1);
563         }
564         assert(i == HAMMER_BIGBLOCK_SIZE);
565         rel_buffer(buffer);
566
567         blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
568         bzero(blockmap, sizeof(*blockmap));
569         blockmap->phys_offset = layer1_offset;
570         blockmap->first_offset = 0;
571         blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
572         blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
573         hammer_crc_set_blockmap(HammerVersion, blockmap);
574 }
575
576 /*
577  * Load the volume's remaining free space into the freemap.
578  *
579  * Returns the number of big-blocks available.
580  */
581 int64_t
582 initialize_freemap(volume_info_t volume)
583 {
584         volume_info_t root_vol;
585         buffer_info_t buffer1 = NULL;
586         buffer_info_t buffer2 = NULL;
587         hammer_blockmap_layer1_t layer1;
588         hammer_blockmap_layer2_t layer2;
589         hammer_off_t layer1_offset;
590         hammer_off_t layer2_offset;
591         hammer_off_t phys_offset;
592         hammer_off_t block_offset;
593         hammer_off_t aligned_vol_free_end;
594         hammer_blockmap_t freemap;
595         int64_t count = 0;
596         int64_t layer1_count = 0;
597
598         root_vol = get_root_volume();
599
600         assert_volume_offset(volume);
601         aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
602
603         printf("initialize freemap volume %d\n", volume->vol_no);
604
605         /*
606          * Initialize the freemap.  First preallocate the big-blocks required
607          * to implement layer2.   This preallocation is a bootstrap allocation
608          * using blocks from the target volume.
609          */
610         freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
611
612         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
613              phys_offset < aligned_vol_free_end;
614              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
615                 layer1_offset = freemap->phys_offset +
616                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
617                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
618                 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
619                         layer1->phys_offset = bootstrap_bigblock(volume);
620                         layer1->blocks_free = 0;
621                         buffer1->cache.modified = 1;
622                         hammer_crc_set_layer1(HammerVersion, layer1);
623                 }
624         }
625
626         /*
627          * Now fill everything in.
628          */
629         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
630              phys_offset < aligned_vol_free_end;
631              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
632                 layer1_count = 0;
633                 layer1_offset = freemap->phys_offset +
634                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
635                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
636                 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
637
638                 for (block_offset = 0;
639                      block_offset < HAMMER_BLOCKMAP_LAYER2;
640                      block_offset += HAMMER_BIGBLOCK_SIZE) {
641                         layer2_offset = layer1->phys_offset +
642                                         HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
643                         layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
644                         bzero(layer2, sizeof(*layer2));
645
646                         if (phys_offset + block_offset < volume->vol_free_off) {
647                                 /*
648                                  * Big-blocks already allocated as part
649                                  * of the freemap bootstrap.
650                                  */
651                                 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
652                                 layer2->append_off = HAMMER_BIGBLOCK_SIZE;
653                                 layer2->bytes_free = 0;
654                         } else if (phys_offset + block_offset < volume->vol_free_end) {
655                                 layer2->zone = 0;
656                                 layer2->append_off = 0;
657                                 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
658                                 ++count;
659                                 ++layer1_count;
660                         } else {
661                                 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
662                                 layer2->append_off = HAMMER_BIGBLOCK_SIZE;
663                                 layer2->bytes_free = 0;
664                         }
665                         hammer_crc_set_layer2(HammerVersion, layer2);
666                         buffer2->cache.modified = 1;
667                 }
668
669                 layer1->blocks_free += layer1_count;
670                 hammer_crc_set_layer1(HammerVersion, layer1);
671                 buffer1->cache.modified = 1;
672         }
673
674         rel_buffer(buffer1);
675         rel_buffer(buffer2);
676         return(count);
677 }
678
679 /*
680  * Returns the number of big-blocks available for filesystem data and undos
681  * without formatting.
682  */
683 int64_t
684 count_freemap(const volume_info_t volume)
685 {
686         hammer_off_t phys_offset;
687         hammer_off_t vol_free_off;
688         hammer_off_t aligned_vol_free_end;
689         int64_t count = 0;
690
691         vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
692
693         assert_volume_offset(volume);
694         aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
695
696         if (volume->vol_no == HAMMER_ROOT_VOLNO)
697                 vol_free_off += HAMMER_BIGBLOCK_SIZE;
698
699         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
700              phys_offset < aligned_vol_free_end;
701              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
702                 vol_free_off += HAMMER_BIGBLOCK_SIZE;
703         }
704
705         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
706              phys_offset < aligned_vol_free_end;
707              phys_offset += HAMMER_BIGBLOCK_SIZE) {
708                 if (phys_offset < vol_free_off)
709                         ;
710                 else if (phys_offset < volume->vol_free_end)
711                         ++count;
712         }
713
714         return(count);
715 }
716
717 /*
718  * Format the undomap for the root volume.
719  */
720 void
721 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size)
722 {
723         hammer_off_t undo_limit;
724         hammer_blockmap_t blockmap;
725         hammer_volume_ondisk_t ondisk;
726         buffer_info_t buffer = NULL;
727         hammer_off_t scan;
728         int n;
729         int limit_index;
730         uint32_t seqno;
731
732         /* Only root volume needs formatting */
733         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
734         ondisk = root_vol->ondisk;
735
736         /*
737          * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
738          * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks.
739          * Size to approximately 0.1% of the disk.
740          *
741          * The minimum UNDO fifo size is 512MB, or approximately 1% of
742          * the recommended 50G disk.
743          *
744          * Changing this minimum is rather dangerous as complex filesystem
745          * operations can cause the UNDO FIFO to fill up otherwise.
746          */
747         undo_limit = *undo_buffer_size;
748         if (undo_limit == 0) {
749                 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000;
750                 if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS)
751                         undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS;
752         }
753         undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit);
754         if (undo_limit < HAMMER_BIGBLOCK_SIZE)
755                 undo_limit = HAMMER_BIGBLOCK_SIZE;
756         if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS)
757                 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS;
758         *undo_buffer_size = undo_limit;
759
760         blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
761         bzero(blockmap, sizeof(*blockmap));
762         blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
763         blockmap->first_offset = HAMMER_ENCODE_UNDO(0);
764         blockmap->next_offset = blockmap->first_offset;
765         blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit);
766         hammer_crc_set_blockmap(HammerVersion, blockmap);
767
768         limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
769         assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS);
770
771         for (n = 0; n < limit_index; ++n)
772                 ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol);
773         while (n < HAMMER_MAX_UNDO_BIGBLOCKS)
774                 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
775
776         /*
777          * Pre-initialize the UNDO blocks (HAMMER version 4+)
778          */
779         printf("initializing the undo map (%jd MB)\n",
780                 (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) /
781                 (1024 * 1024));
782
783         scan = blockmap->first_offset;
784         seqno = 0;
785
786         while (scan < blockmap->alloc_offset) {
787                 hammer_fifo_head_t head;
788                 hammer_fifo_tail_t tail;
789                 int isnew;
790                 int bytes = HAMMER_UNDO_ALIGN;
791
792                 isnew = ((scan & HAMMER_BUFMASK64) == 0);
793                 head = get_buffer_data(scan, &buffer, isnew);
794                 buffer->cache.modified = 1;
795                 tail = (void *)((char *)head + bytes - sizeof(*tail));
796
797                 bzero(head, bytes);
798                 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
799                 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
800                 head->hdr_size = bytes;
801                 head->hdr_seq = seqno++;
802
803                 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
804                 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
805                 tail->tail_size = bytes;
806
807                 hammer_crc_set_fifo_head(HammerVersion, head, bytes);
808
809                 scan += bytes;
810         }
811         rel_buffer(buffer);
812 }
813
814 const char *zone_labels[] = {
815         "",             /* 0 */
816         "raw_volume",   /* 1 */
817         "raw_buffer",   /* 2 */
818         "undo",         /* 3 */
819         "freemap",      /* 4 */
820         "",             /* 5 */
821         "",             /* 6 */
822         "",             /* 7 */
823         "btree",        /* 8 */
824         "meta",         /* 9 */
825         "large_data",   /* 10 */
826         "small_data",   /* 11 */
827         "",             /* 12 */
828         "",             /* 13 */
829         "",             /* 14 */
830         "unavail",      /* 15 */
831 };
832
833 void
834 print_blockmap(const volume_info_t volume)
835 {
836         hammer_blockmap_t blockmap;
837         hammer_volume_ondisk_t ondisk;
838         int64_t size, used;
839         int i;
840 #define INDENT ""
841
842         ondisk = volume->ondisk;
843         printf(INDENT"vol_label\t%s\n", ondisk->vol_label);
844         printf(INDENT"vol_count\t%d\n", ondisk->vol_count);
845         printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg));
846         printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg));
847         printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg));
848         printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end));
849         printf(INDENT"vol0_next_tid\t%016jx\n",
850                (uintmax_t)ondisk->vol0_next_tid);
851
852         blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
853         size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset);
854         if (blockmap->first_offset <= blockmap->next_offset)
855                 used = blockmap->next_offset - blockmap->first_offset;
856         else
857                 used = blockmap->alloc_offset - blockmap->first_offset +
858                         HAMMER_OFF_LONG_ENCODE(blockmap->next_offset);
859         printf(INDENT"undo_size\t%s\n", sizetostr(size));
860         printf(INDENT"undo_used\t%s\n", sizetostr(used));
861
862         printf(INDENT"zone #             "
863                "phys             first            next             alloc\n");
864         for (i = 0; i < HAMMER_MAX_ZONES; i++) {
865                 blockmap = &ondisk->vol0_blockmap[i];
866                 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n",
867                         i, zone_labels[i],
868                         (uintmax_t)blockmap->phys_offset,
869                         (uintmax_t)blockmap->first_offset,
870                         (uintmax_t)blockmap->next_offset,
871                         (uintmax_t)blockmap->alloc_offset);
872         }
873 }
874
875 /*
876  * Flush various tracking structures to disk
877  */
878 void
879 flush_all_volumes(void)
880 {
881         volume_info_t volume;
882
883         TAILQ_FOREACH(volume, &VolList, entry)
884                 flush_volume(volume);
885 }
886
887 void
888 flush_volume(volume_info_t volume)
889 {
890         buffer_info_t buffer;
891         int i;
892
893         for (i = 0; i < HAMMER_BUFLISTS; ++i) {
894                 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
895                         flush_buffer(buffer);
896         }
897         if (writehammervol(volume) == -1) {
898                 err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
899                 /* not reached */
900         }
901 }
902
903 void
904 flush_buffer(buffer_info_t buffer)
905 {
906         volume_info_t volume;
907
908         volume = buffer->volume;
909         if (writehammerbuf(buffer) == -1) {
910                 err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
911                 /* not reached */
912         }
913         buffer->cache.modified = 0;
914 }
915
916 /*
917  * Core I/O operations
918  */
919 static int
920 __read(volume_info_t volume, void *data, int64_t offset, int size)
921 {
922         ssize_t n;
923
924         n = pread(volume->fd, data, size, offset);
925         if (n != size)
926                 return(-1);
927         return(0);
928 }
929
930 static __inline int
931 readhammervol(volume_info_t volume)
932 {
933         return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
934 }
935
936 static __inline int
937 readhammerbuf(buffer_info_t buffer)
938 {
939         return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset,
940                 HAMMER_BUFSIZE));
941 }
942
943 static int
944 __write(volume_info_t volume, const void *data, int64_t offset, int size)
945 {
946         ssize_t n;
947
948         if (volume->rdonly)
949                 return(0);
950
951         n = pwrite(volume->fd, data, size, offset);
952         if (n != size)
953                 return(-1);
954         return(0);
955 }
956
957 static __inline int
958 writehammervol(volume_info_t volume)
959 {
960         return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
961 }
962
963 static __inline int
964 writehammerbuf(buffer_info_t buffer)
965 {
966         return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset,
967                 HAMMER_BUFSIZE));
968 }
969
970 int64_t init_boot_area_size(int64_t value, off_t avg_vol_size)
971 {
972         if (value == 0) {
973                 value = HAMMER_BOOT_NOMBYTES;
974                 while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
975                         value >>= 1;
976         }
977
978         if (value < HAMMER_BOOT_MINBYTES)
979                 value = HAMMER_BOOT_MINBYTES;
980         else if (value > HAMMER_BOOT_MAXBYTES)
981                 value = HAMMER_BOOT_MAXBYTES;
982
983         return(value);
984 }
985
986 int64_t init_memory_log_size(int64_t value, off_t avg_vol_size)
987 {
988         if (value == 0) {
989                 value = HAMMER_MEM_NOMBYTES;
990                 while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
991                         value >>= 1;
992         }
993
994         if (value < HAMMER_MEM_MINBYTES)
995                 value = HAMMER_MEM_MINBYTES;
996         else if (value > HAMMER_MEM_MAXBYTES)
997                 value = HAMMER_MEM_MAXBYTES;
998
999         return(value);
1000 }