kernel: Remove some old 4.3BSD era ioctls.
[dragonfly.git] / sbin / hammer / ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer_util.h"
36
37 #include <sys/diskslice.h>
38 #include <sys/diskmbr.h>
39
40 static void check_volume(volume_info_t volume);
41 static void get_buffer_readahead(buffer_info_t base);
42 static __inline int readhammervol(volume_info_t volume);
43 static __inline int readhammerbuf(buffer_info_t buffer);
44 static __inline int writehammervol(volume_info_t volume);
45 static __inline int writehammerbuf(buffer_info_t buffer);
46
47 hammer_uuid_t Hammer_FSType;
48 hammer_uuid_t Hammer_FSId;
49 int UseReadBehind = -4;
50 int UseReadAhead = 4;
51 int DebugOpt;
52 uint32_t HammerVersion = -1;
53
54 TAILQ_HEAD(volume_list, volume_info);
55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
56 static int valid_hammer_volumes;
57
58 static __inline
59 int
60 buffer_hash(hammer_off_t zone2_offset)
61 {
62         int hi;
63
64         hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
65         return(hi);
66 }
67
68 static
69 buffer_info_t
70 find_buffer(hammer_off_t zone2_offset)
71 {
72         volume_info_t volume;
73         buffer_info_t buffer;
74         int hi;
75
76         volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
77         assert(volume);
78
79         hi = buffer_hash(zone2_offset);
80         TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) {
81                 if (buffer->zone2_offset == zone2_offset)
82                         return(buffer);
83         }
84         return(NULL);
85 }
86
87 static
88 volume_info_t
89 __alloc_volume(const char *volname, int oflags)
90 {
91         volume_info_t volume;
92         int i;
93
94         volume = calloc(1, sizeof(*volume));
95         volume->vol_no = -1;
96         volume->rdonly = (oflags == O_RDONLY);
97         volume->name = strdup(volname);
98         volume->fd = open(volume->name, oflags);
99         if (volume->fd < 0) {
100                 err(1, "alloc_volume: Failed to open %s", volume->name);
101                 /* not reached */
102         }
103         check_volume(volume);
104
105         volume->ondisk = calloc(1, HAMMER_BUFSIZE);
106
107         for (i = 0; i < HAMMER_BUFLISTS; ++i)
108                 TAILQ_INIT(&volume->buffer_lists[i]);
109
110         return(volume);
111 }
112
113 static
114 void
115 __add_volume(const volume_info_t volume)
116 {
117         volume_info_t scan;
118         struct stat st1, st2;
119
120         if (fstat(volume->fd, &st1) != 0) {
121                 errx(1, "add_volume: %s: Failed to stat", volume->name);
122                 /* not reached */
123         }
124
125         TAILQ_FOREACH(scan, &VolList, entry) {
126                 if (scan->vol_no == volume->vol_no) {
127                         errx(1, "add_volume: %s: Duplicate volume number %d "
128                                 "against %s",
129                                 volume->name, volume->vol_no, scan->name);
130                         /* not reached */
131                 }
132                 if (fstat(scan->fd, &st2) != 0) {
133                         errx(1, "add_volume: %s: Failed to stat %s",
134                                 volume->name, scan->name);
135                         /* not reached */
136                 }
137                 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
138                         errx(1, "add_volume: %s: Specified more than once",
139                                 volume->name);
140                         /* not reached */
141                 }
142         }
143
144         TAILQ_INSERT_TAIL(&VolList, volume, entry);
145 }
146
147 static
148 void
149 __verify_volume(const volume_info_t volume)
150 {
151         hammer_volume_ondisk_t ondisk = volume->ondisk;
152         char *fstype;
153
154         if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
155                 errx(1, "verify_volume: Invalid volume signature %016jx",
156                         ondisk->vol_signature);
157                 /* not reached */
158         }
159         if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) {
160                 errx(1, "verify_volume: Invalid root volume# %d",
161                         ondisk->vol_rootvol);
162                 /* not reached */
163         }
164         hammer_uuid_to_string(&ondisk->vol_fstype, &fstype);
165         if (hammer_uuid_compare(&Hammer_FSType, &ondisk->vol_fstype)) {
166                 errx(1, "verify_volume: %s: fstype %s does not indicate "
167                         "this is a HAMMER volume", volume->name, fstype);
168                 /* not reached */
169         }
170         free(fstype);
171         if (hammer_uuid_compare(&Hammer_FSId, &ondisk->vol_fsid)) {
172                 errx(1, "verify_volume: %s: fsid does not match other volumes!",
173                         volume->name);
174                 /* not reached */
175         }
176         if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN ||
177             ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) {
178                 errx(1, "verify_volume: %s: Invalid volume version %u",
179                         volume->name, ondisk->vol_version);
180                 /* not reached */
181         }
182 }
183
184 /*
185  * Initialize a volume structure and ondisk vol_no field.
186  */
187 volume_info_t
188 init_volume(const char *filename, int oflags, int32_t vol_no)
189 {
190         volume_info_t volume;
191
192         volume = __alloc_volume(filename, oflags);
193         volume->vol_no = volume->ondisk->vol_no = vol_no;
194
195         __add_volume(volume);
196
197         return(volume);
198 }
199
200 /*
201  * Initialize a volume structure and read ondisk volume header.
202  */
203 volume_info_t
204 load_volume(const char *filename, int oflags, int verify_volume)
205 {
206         volume_info_t volume;
207         int n;
208
209         volume = __alloc_volume(filename, oflags);
210
211         n = readhammervol(volume);
212         if (n == -1) {
213                 err(1, "load_volume: %s: Read failed at offset 0",
214                     volume->name);
215                 /* not reached */
216         }
217         volume->vol_no = volume->ondisk->vol_no;
218         if (volume->vol_no == HAMMER_ROOT_VOLNO)
219                 HammerVersion = volume->ondisk->vol_version;
220
221         if (valid_hammer_volumes++ == 0)
222                 Hammer_FSId = volume->ondisk->vol_fsid;
223         if (verify_volume)
224                 __verify_volume(volume);
225
226         __add_volume(volume);
227
228         return(volume);
229 }
230
231 /*
232  * Check basic volume characteristics.
233  */
234 static
235 void
236 check_volume(volume_info_t volume)
237 {
238         struct partinfo pinfo;
239         struct stat st;
240
241         /*
242          * Allow the formatting of block devices or regular files
243          */
244         if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) {
245                 if (fstat(volume->fd, &st) < 0) {
246                         err(1, "Unable to stat %s", volume->name);
247                         /* not reached */
248                 }
249                 if (S_ISREG(st.st_mode)) {
250                         volume->size = st.st_size;
251                         volume->type = "REGFILE";
252                 } else {
253                         errx(1, "Unsupported file type for %s", volume->name);
254                         /* not reached */
255                 }
256         } else {
257                 /*
258                  * When formatting a block device as a HAMMER volume the
259                  * sector size must be compatible.  HAMMER uses 16384 byte
260                  * filesystem buffers.
261                  */
262                 if (pinfo.reserved_blocks) {
263                         errx(1, "HAMMER cannot be placed in a partition "
264                                 "which overlaps the disklabel or MBR");
265                         /* not reached */
266                 }
267                 if (pinfo.media_blksize > HAMMER_BUFSIZE ||
268                     HAMMER_BUFSIZE % pinfo.media_blksize) {
269                         errx(1, "A media sector size of %d is not supported",
270                              pinfo.media_blksize);
271                         /* not reached */
272                 }
273
274                 volume->size = pinfo.media_size;
275                 volume->device_offset = pinfo.media_offset;
276                 volume->type = "DEVICE";
277         }
278 }
279
280 int
281 is_regfile(const volume_info_t volume)
282 {
283         return(strcmp(volume->type, "REGFILE") ? 0 : 1);
284 }
285
286 void
287 assert_volume_offset(const volume_info_t volume)
288 {
289         assert(hammer_is_zone_raw_buffer(volume->vol_free_off));
290         assert(hammer_is_zone_raw_buffer(volume->vol_free_end));
291         if (volume->vol_free_off >= volume->vol_free_end) {
292                 errx(1, "Ran out of room, filesystem too small");
293                 /* not reached */
294         }
295 }
296
297 volume_info_t
298 get_volume(int32_t vol_no)
299 {
300         volume_info_t volume;
301
302         TAILQ_FOREACH(volume, &VolList, entry) {
303                 if (volume->vol_no == vol_no)
304                         break;
305         }
306
307         return(volume);
308 }
309
310 volume_info_t
311 get_root_volume(void)
312 {
313         return(get_volume(HAMMER_ROOT_VOLNO));
314 }
315
316 static
317 hammer_off_t
318 __blockmap_xlate_to_zone2(hammer_off_t buf_offset)
319 {
320         hammer_off_t zone2_offset;
321         int error = 0;
322
323         if (hammer_is_zone_raw_buffer(buf_offset))
324                 zone2_offset = buf_offset;
325         else
326                 zone2_offset = blockmap_lookup(buf_offset, &error);
327
328         if (error)
329                 return(HAMMER_OFF_BAD);
330         assert(hammer_is_zone_raw_buffer(zone2_offset));
331
332         return(zone2_offset);
333 }
334
335 static
336 buffer_info_t
337 __alloc_buffer(hammer_off_t zone2_offset, int isnew)
338 {
339         volume_info_t volume;
340         buffer_info_t buffer;
341         int hi;
342
343         volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
344         assert(volume != NULL);
345
346         buffer = calloc(1, sizeof(*buffer));
347         buffer->zone2_offset = zone2_offset;
348         buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset);
349         buffer->volume = volume;
350         buffer->ondisk = calloc(1, HAMMER_BUFSIZE);
351
352         if (isnew <= 0) {
353                 if (readhammerbuf(buffer) == -1) {
354                         err(1, "Failed to read %s:%016jx at %016jx",
355                             volume->name,
356                             (intmax_t)buffer->zone2_offset,
357                             (intmax_t)buffer->raw_offset);
358                         /* not reached */
359                 }
360         }
361
362         hi = buffer_hash(zone2_offset);
363         TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry);
364         hammer_cache_add(&buffer->cache);
365
366         return(buffer);
367 }
368
369 /*
370  * Acquire the 16KB buffer for specified zone offset.
371  */
372 static
373 buffer_info_t
374 get_buffer(hammer_off_t buf_offset, int isnew)
375 {
376         buffer_info_t buffer;
377         hammer_off_t zone2_offset;
378         int dora = 0;
379
380         zone2_offset = __blockmap_xlate_to_zone2(buf_offset);
381         if (zone2_offset == HAMMER_OFF_BAD)
382                 return(NULL);
383
384         zone2_offset &= ~HAMMER_BUFMASK64;
385         buffer = find_buffer(zone2_offset);
386
387         if (buffer == NULL) {
388                 buffer = __alloc_buffer(zone2_offset, isnew);
389                 dora = (isnew == 0);
390         } else {
391                 assert(isnew != -1);
392                 hammer_cache_used(&buffer->cache);
393         }
394         assert(buffer->ondisk != NULL);
395
396         ++buffer->cache.refs;
397         hammer_cache_flush();
398
399         if (isnew > 0) {
400                 assert(buffer->cache.modified == 0);
401                 bzero(buffer->ondisk, HAMMER_BUFSIZE);
402                 buffer->cache.modified = 1;
403         }
404         if (dora)
405                 get_buffer_readahead(buffer);
406         return(buffer);
407 }
408
409 static
410 void
411 get_buffer_readahead(const buffer_info_t base)
412 {
413         buffer_info_t buffer;
414         volume_info_t volume;
415         hammer_off_t zone2_offset;
416         int64_t raw_offset;
417         int ri = UseReadBehind;
418         int re = UseReadAhead;
419
420         raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
421         volume = base->volume;
422
423         while (ri < re) {
424                 if (raw_offset >= volume->ondisk->vol_buf_end)
425                         break;
426                 if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) {
427                         ++ri;
428                         raw_offset += HAMMER_BUFSIZE;
429                         continue;
430                 }
431                 zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
432                         raw_offset - volume->ondisk->vol_buf_beg);
433                 buffer = find_buffer(zone2_offset);
434                 if (buffer == NULL) {
435                         /* call with -1 to prevent another readahead */
436                         buffer = get_buffer(zone2_offset, -1);
437                         rel_buffer(buffer);
438                 }
439                 ++ri;
440                 raw_offset += HAMMER_BUFSIZE;
441         }
442 }
443
444 void
445 rel_buffer(buffer_info_t buffer)
446 {
447         volume_info_t volume;
448         int hi;
449
450         if (buffer == NULL)
451                 return;
452         assert(buffer->cache.refs > 0);
453         if (--buffer->cache.refs == 0) {
454                 if (buffer->cache.delete) {
455                         hi = buffer_hash(buffer->zone2_offset);
456                         volume = buffer->volume;
457                         if (buffer->cache.modified)
458                                 flush_buffer(buffer);
459                         TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
460                         hammer_cache_del(&buffer->cache);
461                         free(buffer->ondisk);
462                         free(buffer);
463                 }
464         }
465 }
466
467 /*
468  * Retrieve a pointer to a buffer data given a zone-X buffer offset.
469  * The underlying bufferp is freed if isnew or the corresponding zone-2
470  * offset is out of range of the cached data.  If bufferp is freed,
471  * a referenced buffer is loaded into it.
472  */
473 void *
474 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew)
475 {
476         hammer_off_t xor = 0;
477         hammer_volume_ondisk_t ondisk;
478
479         if (*bufferp != NULL) {
480                 if (hammer_is_zone_undo(buf_offset)) {
481                         ondisk = (*bufferp)->volume->ondisk;
482                         xor = hammer_xlate_to_undo(ondisk, buf_offset) ^
483                                 (*bufferp)->zone2_offset;
484                 } else if (hammer_is_zone_direct_xlated(buf_offset)) {
485                         xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^
486                               HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset);
487                 } else {
488                         assert(0);
489                 }
490                 if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) {
491                         rel_buffer(*bufferp);
492                         *bufferp = NULL;
493                 } else {
494                         hammer_cache_used(&(*bufferp)->cache);
495                 }
496         }
497
498         if (*bufferp == NULL) {
499                 *bufferp = get_buffer(buf_offset, isnew);
500                 if (*bufferp == NULL)
501                         return(NULL);
502         }
503
504         return((char *)(*bufferp)->ondisk +
505                 ((int32_t)buf_offset & HAMMER_BUFMASK));
506 }
507
508 /*
509  * Allocate HAMMER elements - B-Tree nodes
510  */
511 hammer_node_ondisk_t
512 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp)
513 {
514         hammer_node_ondisk_t node;
515
516         node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
517                               offp, data_bufferp);
518         bzero(node, sizeof(*node));
519         return(node);
520 }
521
522 /*
523  * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc)
524  */
525 void *
526 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
527                    buffer_info_t *data_bufferp)
528 {
529         void *data;
530
531         data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
532                               offp, data_bufferp);
533         bzero(data, data_len);
534         return(data);
535 }
536
537 /*
538  * Format a new blockmap.  This is mostly a degenerate case because
539  * all allocations are now actually done from the freemap.
540  */
541 void
542 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset)
543 {
544         hammer_blockmap_t blockmap;
545         hammer_off_t zone_base;
546
547         /* Only root volume needs formatting */
548         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
549
550         assert(hammer_is_index_record(zone));
551
552         blockmap = &root_vol->ondisk->vol0_blockmap[zone];
553         zone_base = HAMMER_ZONE_ENCODE(zone, offset);
554
555         bzero(blockmap, sizeof(*blockmap));
556         blockmap->phys_offset = 0;
557         blockmap->first_offset = zone_base;
558         blockmap->next_offset = zone_base;
559         blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
560         hammer_crc_set_blockmap(HammerVersion, blockmap);
561 }
562
563 /*
564  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
565  * code will load each volume's freemap.
566  */
567 void
568 format_freemap(volume_info_t root_vol)
569 {
570         buffer_info_t buffer = NULL;
571         hammer_off_t layer1_offset;
572         hammer_blockmap_t blockmap;
573         hammer_blockmap_layer1_t layer1;
574         int i, isnew;
575
576         /* Only root volume needs formatting */
577         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
578
579         layer1_offset = bootstrap_bigblock(root_vol);
580         for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
581                 isnew = ((i % HAMMER_BUFSIZE) == 0);
582                 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
583                 bzero(layer1, sizeof(*layer1));
584                 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
585                 layer1->blocks_free = 0;
586                 hammer_crc_set_layer1(HammerVersion, layer1);
587         }
588         assert(i == HAMMER_BIGBLOCK_SIZE);
589         rel_buffer(buffer);
590
591         blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
592         bzero(blockmap, sizeof(*blockmap));
593         blockmap->phys_offset = layer1_offset;
594         blockmap->first_offset = 0;
595         blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
596         blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
597         hammer_crc_set_blockmap(HammerVersion, blockmap);
598 }
599
600 /*
601  * Load the volume's remaining free space into the freemap.
602  *
603  * Returns the number of big-blocks available.
604  */
605 int64_t
606 initialize_freemap(volume_info_t volume)
607 {
608         volume_info_t root_vol;
609         buffer_info_t buffer1 = NULL;
610         buffer_info_t buffer2 = NULL;
611         hammer_blockmap_layer1_t layer1;
612         hammer_blockmap_layer2_t layer2;
613         hammer_off_t layer1_offset;
614         hammer_off_t layer2_offset;
615         hammer_off_t phys_offset;
616         hammer_off_t block_offset;
617         hammer_off_t aligned_vol_free_end;
618         hammer_blockmap_t freemap;
619         int64_t count = 0;
620         int64_t layer1_count = 0;
621
622         root_vol = get_root_volume();
623
624         assert_volume_offset(volume);
625         aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
626
627         printf("initialize freemap volume %d\n", volume->vol_no);
628
629         /*
630          * Initialize the freemap.  First preallocate the big-blocks required
631          * to implement layer2.   This preallocation is a bootstrap allocation
632          * using blocks from the target volume.
633          */
634         freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
635
636         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
637              phys_offset < aligned_vol_free_end;
638              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
639                 layer1_offset = freemap->phys_offset +
640                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
641                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
642                 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
643                         layer1->phys_offset = bootstrap_bigblock(volume);
644                         layer1->blocks_free = 0;
645                         buffer1->cache.modified = 1;
646                         hammer_crc_set_layer1(HammerVersion, layer1);
647                 }
648         }
649
650         /*
651          * Now fill everything in.
652          */
653         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
654              phys_offset < aligned_vol_free_end;
655              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
656                 layer1_count = 0;
657                 layer1_offset = freemap->phys_offset +
658                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
659                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
660                 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
661
662                 for (block_offset = 0;
663                      block_offset < HAMMER_BLOCKMAP_LAYER2;
664                      block_offset += HAMMER_BIGBLOCK_SIZE) {
665                         layer2_offset = layer1->phys_offset +
666                                         HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
667                         layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
668                         bzero(layer2, sizeof(*layer2));
669
670                         if (phys_offset + block_offset < volume->vol_free_off) {
671                                 /*
672                                  * Big-blocks already allocated as part
673                                  * of the freemap bootstrap.
674                                  */
675                                 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
676                                 layer2->append_off = HAMMER_BIGBLOCK_SIZE;
677                                 layer2->bytes_free = 0;
678                         } else if (phys_offset + block_offset < volume->vol_free_end) {
679                                 layer2->zone = 0;
680                                 layer2->append_off = 0;
681                                 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
682                                 ++count;
683                                 ++layer1_count;
684                         } else {
685                                 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
686                                 layer2->append_off = HAMMER_BIGBLOCK_SIZE;
687                                 layer2->bytes_free = 0;
688                         }
689                         hammer_crc_set_layer2(HammerVersion, layer2);
690                         buffer2->cache.modified = 1;
691                 }
692
693                 layer1->blocks_free += layer1_count;
694                 hammer_crc_set_layer1(HammerVersion, layer1);
695                 buffer1->cache.modified = 1;
696         }
697
698         rel_buffer(buffer1);
699         rel_buffer(buffer2);
700         return(count);
701 }
702
703 /*
704  * Returns the number of big-blocks available for filesystem data and undos
705  * without formatting.
706  */
707 int64_t
708 count_freemap(const volume_info_t volume)
709 {
710         hammer_off_t phys_offset;
711         hammer_off_t vol_free_off;
712         hammer_off_t aligned_vol_free_end;
713         int64_t count = 0;
714
715         vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
716
717         assert_volume_offset(volume);
718         aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
719
720         if (volume->vol_no == HAMMER_ROOT_VOLNO)
721                 vol_free_off += HAMMER_BIGBLOCK_SIZE;
722
723         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
724              phys_offset < aligned_vol_free_end;
725              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
726                 vol_free_off += HAMMER_BIGBLOCK_SIZE;
727         }
728
729         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
730              phys_offset < aligned_vol_free_end;
731              phys_offset += HAMMER_BIGBLOCK_SIZE) {
732                 if (phys_offset < vol_free_off)
733                         ;
734                 else if (phys_offset < volume->vol_free_end)
735                         ++count;
736         }
737
738         return(count);
739 }
740
741 /*
742  * Format the undomap for the root volume.
743  */
744 void
745 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size)
746 {
747         hammer_off_t undo_limit;
748         hammer_blockmap_t blockmap;
749         hammer_volume_ondisk_t ondisk;
750         buffer_info_t buffer = NULL;
751         hammer_off_t scan;
752         int n;
753         int limit_index;
754         uint32_t seqno;
755
756         /* Only root volume needs formatting */
757         assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
758         ondisk = root_vol->ondisk;
759
760         /*
761          * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
762          * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks.
763          * Size to approximately 0.1% of the disk.
764          *
765          * The minimum UNDO fifo size is 512MB, or approximately 1% of
766          * the recommended 50G disk.
767          *
768          * Changing this minimum is rather dangerous as complex filesystem
769          * operations can cause the UNDO FIFO to fill up otherwise.
770          */
771         undo_limit = *undo_buffer_size;
772         if (undo_limit == 0) {
773                 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000;
774                 if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS)
775                         undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS;
776         }
777         undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit);
778         if (undo_limit < HAMMER_BIGBLOCK_SIZE)
779                 undo_limit = HAMMER_BIGBLOCK_SIZE;
780         if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS)
781                 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS;
782         *undo_buffer_size = undo_limit;
783
784         blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
785         bzero(blockmap, sizeof(*blockmap));
786         blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
787         blockmap->first_offset = HAMMER_ENCODE_UNDO(0);
788         blockmap->next_offset = blockmap->first_offset;
789         blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit);
790         hammer_crc_set_blockmap(HammerVersion, blockmap);
791
792         limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
793         assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS);
794
795         for (n = 0; n < limit_index; ++n)
796                 ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol);
797         while (n < HAMMER_MAX_UNDO_BIGBLOCKS)
798                 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
799
800         /*
801          * Pre-initialize the UNDO blocks (HAMMER version 4+)
802          */
803         printf("initializing the undo map (%jd MB)\n",
804                 (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) /
805                 (1024 * 1024));
806
807         scan = blockmap->first_offset;
808         seqno = 0;
809
810         while (scan < blockmap->alloc_offset) {
811                 hammer_fifo_head_t head;
812                 hammer_fifo_tail_t tail;
813                 int bytes = HAMMER_UNDO_ALIGN;
814                 int isnew = ((scan & HAMMER_BUFMASK64) == 0);
815
816                 head = get_buffer_data(scan, &buffer, isnew);
817                 buffer->cache.modified = 1;
818                 tail = (void *)((char *)head + bytes - sizeof(*tail));
819
820                 bzero(head, bytes);
821                 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
822                 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
823                 head->hdr_size = bytes;
824                 head->hdr_seq = seqno++;
825
826                 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
827                 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
828                 tail->tail_size = bytes;
829
830                 hammer_crc_set_fifo_head(HammerVersion, head, bytes);
831
832                 scan += bytes;
833         }
834         rel_buffer(buffer);
835 }
836
837 const char *zone_labels[] = {
838         "",             /* 0 */
839         "raw_volume",   /* 1 */
840         "raw_buffer",   /* 2 */
841         "undo",         /* 3 */
842         "freemap",      /* 4 */
843         "",             /* 5 */
844         "",             /* 6 */
845         "",             /* 7 */
846         "btree",        /* 8 */
847         "meta",         /* 9 */
848         "large_data",   /* 10 */
849         "small_data",   /* 11 */
850         "",             /* 12 */
851         "",             /* 13 */
852         "",             /* 14 */
853         "unavail",      /* 15 */
854 };
855
856 void
857 print_blockmap(const volume_info_t volume)
858 {
859         hammer_blockmap_t blockmap;
860         hammer_volume_ondisk_t ondisk = volume->ondisk;
861         int64_t size, used;
862         int i;
863         char *fstype, *fsid;
864 #define INDENT ""
865
866         printf(INDENT"vol_label\t%s\n", ondisk->vol_label);
867         printf(INDENT"vol_count\t%d\n", ondisk->vol_count);
868
869         hammer_uuid_to_string(&ondisk->vol_fstype, &fstype);
870         hammer_uuid_to_string(&ondisk->vol_fsid, &fsid);
871         printf(INDENT"vol_fstype\t%s", fstype);
872         if (strcmp(fstype, "61dc63ac-6e38-11dc-8513-01301bb8a9f5") == 0)
873                 printf(" \"%s\"\n", HAMMER_FSTYPE_STRING);
874         else
875                 printf("\n"); /* invalid UUID */
876         printf(INDENT"vol_fsid\t%s\n", fsid);
877         free(fstype);
878         free(fsid);
879
880         printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg));
881         printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg));
882         printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg));
883         printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end));
884         printf(INDENT"vol0_next_tid\t%016jx\n",
885                (uintmax_t)ondisk->vol0_next_tid);
886
887         blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
888         size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset);
889         if (blockmap->first_offset <= blockmap->next_offset)
890                 used = blockmap->next_offset - blockmap->first_offset;
891         else
892                 used = blockmap->alloc_offset - blockmap->first_offset +
893                         HAMMER_OFF_LONG_ENCODE(blockmap->next_offset);
894         printf(INDENT"undo_size\t%s\n", sizetostr(size));
895         printf(INDENT"undo_used\t%s\n", sizetostr(used));
896
897         printf(INDENT"zone #             "
898                "phys             first            next             alloc\n");
899         for (i = 0; i < HAMMER_MAX_ZONES; i++) {
900                 blockmap = &ondisk->vol0_blockmap[i];
901                 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n",
902                         i, zone_labels[i],
903                         (uintmax_t)blockmap->phys_offset,
904                         (uintmax_t)blockmap->first_offset,
905                         (uintmax_t)blockmap->next_offset,
906                         (uintmax_t)blockmap->alloc_offset);
907         }
908 }
909
910 /*
911  * Flush various tracking structures to disk
912  */
913 void
914 flush_all_volumes(void)
915 {
916         volume_info_t volume;
917
918         TAILQ_FOREACH(volume, &VolList, entry)
919                 flush_volume(volume);
920 }
921
922 void
923 flush_volume(volume_info_t volume)
924 {
925         buffer_info_t buffer;
926         int i;
927
928         for (i = 0; i < HAMMER_BUFLISTS; ++i) {
929                 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
930                         flush_buffer(buffer);
931         }
932         if (writehammervol(volume) == -1) {
933                 err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
934                 /* not reached */
935         }
936 }
937
938 void
939 flush_buffer(buffer_info_t buffer)
940 {
941         volume_info_t volume;
942
943         volume = buffer->volume;
944         if (writehammerbuf(buffer) == -1) {
945                 err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
946                 /* not reached */
947         }
948         buffer->cache.modified = 0;
949 }
950
951 /*
952  * Core I/O operations
953  */
954 static
955 int
956 __read(volume_info_t volume, void *data, int64_t offset, int size)
957 {
958         ssize_t n;
959
960         n = pread(volume->fd, data, size, offset);
961         if (n != size)
962                 return(-1);
963         return(0);
964 }
965
966 static __inline
967 int
968 readhammervol(volume_info_t volume)
969 {
970         return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
971 }
972
973 static __inline
974 int
975 readhammerbuf(buffer_info_t buffer)
976 {
977         return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset,
978                 HAMMER_BUFSIZE));
979 }
980
981 static
982 int
983 __write(volume_info_t volume, const void *data, int64_t offset, int size)
984 {
985         ssize_t n;
986
987         if (volume->rdonly)
988                 return(0);
989
990         n = pwrite(volume->fd, data, size, offset);
991         if (n != size)
992                 return(-1);
993         return(0);
994 }
995
996 static __inline
997 int
998 writehammervol(volume_info_t volume)
999 {
1000         return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
1001 }
1002
1003 static __inline
1004 int
1005 writehammerbuf(buffer_info_t buffer)
1006 {
1007         return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset,
1008                 HAMMER_BUFSIZE));
1009 }
1010
1011 int64_t
1012 init_boot_area_size(int64_t value, off_t avg_vol_size)
1013 {
1014         if (value == 0) {
1015                 value = HAMMER_BOOT_NOMBYTES;
1016                 while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1017                         value >>= 1;
1018         }
1019
1020         if (value < HAMMER_BOOT_MINBYTES)
1021                 value = HAMMER_BOOT_MINBYTES;
1022         else if (value > HAMMER_BOOT_MAXBYTES)
1023                 value = HAMMER_BOOT_MAXBYTES;
1024
1025         return(value);
1026 }
1027
1028 int64_t
1029 init_memory_log_size(int64_t value, off_t avg_vol_size)
1030 {
1031         if (value == 0) {
1032                 value = HAMMER_MEM_NOMBYTES;
1033                 while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1034                         value >>= 1;
1035         }
1036
1037         if (value < HAMMER_MEM_MINBYTES)
1038                 value = HAMMER_MEM_MINBYTES;
1039         else if (value > HAMMER_MEM_MAXBYTES)
1040                 value = HAMMER_MEM_MAXBYTES;
1041
1042         return(value);
1043 }