2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sbin/hammer/ondisk.c,v 1.12 2008/02/20 00:55:48 dillon Exp $
37 #include <sys/types.h>
46 #include "hammer_util.h"
48 static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
49 struct buffer_info **bufferp);
50 static hammer_off_t alloc_bigblock(struct volume_info *volume,
53 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
54 static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
55 struct buffer_info **bufp, u_int16_t hdr_type);
56 static void readhammerbuf(struct volume_info *vol, void *data,
59 static void writehammerbuf(struct volume_info *vol, const void *data,
67 int UsingSuperClusters;
70 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
73 * Lookup the requested information structure and related on-disk buffer.
74 * Missing structures are created.
77 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
79 struct volume_info *vol;
80 struct volume_info *scan;
81 struct hammer_volume_ondisk *ondisk;
85 * Allocate the volume structure
87 vol = malloc(sizeof(*vol));
88 bzero(vol, sizeof(*vol));
89 TAILQ_INIT(&vol->buffer_list);
90 vol->name = strdup(filename);
91 vol->fd = open(filename, oflags);
95 err(1, "setup_volume: %s: Open failed", filename);
99 * Read or initialize the volume header
101 vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
103 bzero(ondisk, HAMMER_BUFSIZE);
105 n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
106 if (n != HAMMER_BUFSIZE) {
107 err(1, "setup_volume: %s: Read failed at offset 0",
110 vol_no = ondisk->vol_no;
112 RootVolNo = ondisk->vol_rootvol;
113 } else if (RootVolNo != (int)ondisk->vol_rootvol) {
114 errx(1, "setup_volume: %s: root volume disagreement: "
116 vol->name, RootVolNo, ondisk->vol_rootvol);
119 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
120 errx(1, "setup_volume: %s: Header does not indicate "
121 "that this is a hammer volume", vol->name);
123 if (TAILQ_EMPTY(&VolList)) {
124 Hammer_FSId = vol->ondisk->vol_fsid;
125 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
126 errx(1, "setup_volume: %s: FSId does match other "
127 "volumes!", vol->name);
130 vol->vol_no = vol_no;
133 /*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/
134 vol->cache.modified = 1;
138 * Link the volume structure in
140 TAILQ_FOREACH(scan, &VolList, entry) {
141 if (scan->vol_no == vol_no) {
142 errx(1, "setup_volume %s: Duplicate volume number %d "
143 "against %s", filename, vol_no, scan->name);
146 TAILQ_INSERT_TAIL(&VolList, vol, entry);
151 get_volume(int32_t vol_no)
153 struct volume_info *vol;
155 TAILQ_FOREACH(vol, &VolList, entry) {
156 if (vol->vol_no == vol_no)
160 errx(1, "get_volume: Volume %d does not exist!", vol_no);
162 /* not added to or removed from hammer cache */
167 rel_volume(struct volume_info *volume)
169 /* not added to or removed from hammer cache */
170 --volume->cache.refs;
174 * Acquire the specified buffer.
177 get_buffer(hammer_off_t buf_offset, int isnew)
180 struct buffer_info *buf;
181 struct volume_info *volume;
185 assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
187 vol_no = HAMMER_VOL_DECODE(buf_offset);
188 volume = get_volume(vol_no);
189 buf_offset &= ~HAMMER_BUFMASK64;
191 TAILQ_FOREACH(buf, &volume->buffer_list, entry) {
192 if (buf->buf_offset == buf_offset)
196 buf = malloc(sizeof(*buf));
197 bzero(buf, sizeof(*buf));
198 buf->buf_offset = buf_offset;
199 buf->buf_disk_offset = volume->ondisk->vol_buf_beg +
200 (buf_offset & HAMMER_OFF_SHORT_MASK);
201 buf->volume = volume;
202 TAILQ_INSERT_TAIL(&volume->buffer_list, buf, entry);
203 ++volume->cache.refs;
204 buf->cache.u.buffer = buf;
205 hammer_cache_add(&buf->cache, ISBUFFER);
208 hammer_cache_flush();
209 if ((ondisk = buf->ondisk) == NULL) {
210 buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
212 n = pread(volume->fd, ondisk, HAMMER_BUFSIZE,
213 buf->buf_disk_offset);
214 if (n != HAMMER_BUFSIZE) {
215 err(1, "get_buffer: %s:%016llx Read failed at "
217 volume->name, buf->buf_offset,
218 buf->buf_disk_offset);
223 bzero(ondisk, HAMMER_BUFSIZE);
224 buf->cache.modified = 1;
230 rel_buffer(struct buffer_info *buffer)
232 struct volume_info *volume;
234 assert(buffer->cache.refs > 0);
235 if (--buffer->cache.refs == 0) {
236 if (buffer->cache.delete) {
237 volume = buffer->volume;
238 if (buffer->cache.modified)
239 flush_buffer(buffer);
240 TAILQ_REMOVE(&volume->buffer_list, buffer, entry);
241 hammer_cache_del(&buffer->cache);
242 free(buffer->ondisk);
250 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
253 struct buffer_info *buffer;
255 if ((buffer = *bufferp) != NULL) {
257 ((buffer->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) {
259 buffer = *bufferp = NULL;
263 buffer = *bufferp = get_buffer(buf_offset, isnew);
264 return((char *)buffer->ondisk + ((int32_t)buf_offset & HAMMER_BUFMASK));
268 * Retrieve a pointer to a B-Tree node given a cluster offset. The underlying
269 * bufp is freed if non-NULL and a referenced buffer is loaded into it.
272 get_node(hammer_off_t node_offset, struct buffer_info **bufp)
274 struct buffer_info *buf;
278 *bufp = buf = get_buffer(node_offset, 0);
279 return((void *)((char *)buf->ondisk +
280 (int32_t)(node_offset & HAMMER_BUFMASK)));
284 * Allocate HAMMER elements - btree nodes, data storage, and record elements
286 * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned
287 * item and zero's out the remainder, so don't bzero() it.
290 alloc_btree_element(hammer_off_t *offp)
292 struct buffer_info *buffer = NULL;
293 hammer_node_ondisk_t node;
295 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
297 bzero(node, sizeof(*node));
298 /* XXX buffer not released, pointer remains valid */
302 hammer_record_ondisk_t
303 alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap)
305 struct buffer_info *record_buffer = NULL;
306 struct buffer_info *data_buffer = NULL;
307 hammer_record_ondisk_t rec;
309 rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec),
310 offp, &record_buffer);
311 bzero(rec, sizeof(*rec));
313 if (data_len >= HAMMER_BUFSIZE) {
314 assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
315 *datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
316 &rec->base.data_off, &data_buffer);
317 rec->base.data_len = data_len;
318 bzero(*datap, data_len);
319 } else if (data_len) {
320 *datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
321 &rec->base.data_off, &data_buffer);
322 rec->base.data_len = data_len;
323 bzero(*datap, data_len);
327 /* XXX buf not released, ptr remains valid */
332 * Format a new freemap. Set all layer1 entries to UNAVAIL. The initialize
333 * code will load each volume's freemap.
336 format_freemap(struct volume_info *root_vol, hammer_blockmap_t blockmap)
338 struct buffer_info *buffer = NULL;
339 hammer_off_t layer1_offset;
340 struct hammer_blockmap_layer1 *layer1;
343 layer1_offset = alloc_bigblock(root_vol, 0);
344 for (i = 0; i < (int)HAMMER_BLOCKMAP_RADIX1; ++i) {
345 isnew = ((i % HAMMER_BLOCKMAP_RADIX1_PERBUFFER) == 0);
346 layer1 = get_buffer_data(layer1_offset + i * sizeof(*layer1),
348 bzero(layer1, sizeof(*layer1));
349 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
350 layer1->layer1_crc = crc32(layer1, sizeof(*layer1));
354 blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
355 blockmap->phys_offset = layer1_offset;
356 blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
357 blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
358 blockmap->reserved01 = 0;
359 blockmap->entry_crc = crc32(blockmap, sizeof(*blockmap));
360 root_vol->cache.modified = 1;
364 * Load the volume's remaining free space into the freemap. If this is
365 * the root volume, initialize the freemap owner for the layer1 bigblock.
367 * Returns the number of bigblocks available.
370 initialize_freemap(struct volume_info *vol)
372 struct volume_info *root_vol;
373 struct buffer_info *buffer1 = NULL;
374 struct buffer_info *buffer2 = NULL;
375 struct hammer_blockmap_layer1 *layer1;
376 struct hammer_blockmap_layer2 *layer2;
377 hammer_off_t layer1_base;
378 hammer_off_t layer1_offset;
379 hammer_off_t layer2_offset;
380 hammer_off_t phys_offset;
381 hammer_off_t aligned_vol_free_end;
384 root_vol = get_volume(RootVolNo);
385 aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
386 & ~HAMMER_BLOCKMAP_LAYER2_MASK;
388 printf("initialize freemap volume %d\n", vol->vol_no);
391 * Initialize the freemap. Loop through all buffers. Fix-up the
392 * ones which have already been allocated (should only be self
393 * bootstrap large-blocks).
395 layer1_base = root_vol->ondisk->vol0_blockmap[
396 HAMMER_ZONE_FREEMAP_INDEX].phys_offset;
397 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
398 phys_offset < aligned_vol_free_end;
399 phys_offset += HAMMER_LARGEBLOCK_SIZE) {
400 layer1_offset = layer1_base +
401 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
402 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
404 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
405 layer1->phys_offset = alloc_bigblock(root_vol, 0);
406 layer1->blocks_free = 0;
407 buffer1->cache.modified = 1;
409 layer2_offset = layer1->phys_offset +
410 HAMMER_BLOCKMAP_LAYER2_OFFSET(phys_offset);
412 layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
413 if (phys_offset < vol->vol_free_off) {
415 * Fixups XXX - bigblocks already allocated as part
416 * of the freemap bootstrap.
418 layer2->u.owner = HAMMER_ENCODE_FREEMAP(0, 0); /* XXX */
419 } else if (phys_offset < vol->vol_free_end) {
420 ++layer1->blocks_free;
421 buffer1->cache.modified = 1;
422 layer2->u.owner = HAMMER_BLOCKMAP_FREE;
425 layer2->u.owner = HAMMER_BLOCKMAP_UNAVAIL;
427 layer2->entry_crc = crc32(layer2, sizeof(*layer2));
428 buffer2->cache.modified = 1;
433 if (layer1_offset - layer1_base != HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset + HAMMER_LARGEBLOCK_SIZE)) {
434 layer1->layer1_crc = crc32(layer1, sizeof(*layer1));
435 buffer1->cache.modified = 1;
440 rel_volume(root_vol);
445 * Allocate big-blocks using our poor-man's volume->vol_free_off and
446 * update the freemap if owner != 0.
449 alloc_bigblock(struct volume_info *volume, hammer_off_t owner)
451 struct buffer_info *buffer = NULL;
452 struct volume_info *root_vol;
453 hammer_off_t result_offset;
454 hammer_off_t layer_offset;
455 struct hammer_blockmap_layer1 *layer1;
456 struct hammer_blockmap_layer2 *layer2;
459 if (volume == NULL) {
460 volume = get_volume(RootVolNo);
465 result_offset = volume->vol_free_off;
466 if (result_offset >= volume->vol_free_end)
467 panic("alloc_bigblock: Ran out of room, filesystem too small");
468 volume->vol_free_off += HAMMER_LARGEBLOCK_SIZE;
474 root_vol = get_volume(RootVolNo);
475 layer_offset = root_vol->ondisk->vol0_blockmap[
476 HAMMER_ZONE_FREEMAP_INDEX].phys_offset;
477 layer_offset += HAMMER_BLOCKMAP_LAYER1_OFFSET(result_offset);
478 layer1 = get_buffer_data(layer_offset, &buffer, 0);
479 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
480 --layer1->blocks_free;
481 layer1->layer1_crc = crc32(layer1, sizeof(*layer1));
482 buffer->cache.modified = 1;
483 layer_offset = layer1->phys_offset +
484 HAMMER_BLOCKMAP_LAYER2_OFFSET(result_offset);
485 layer2 = get_buffer_data(layer_offset, &buffer, 0);
486 assert(layer2->u.owner == HAMMER_BLOCKMAP_FREE);
487 layer2->u.owner = owner;
488 layer2->entry_crc = crc32(layer2, sizeof(*layer2));
489 buffer->cache.modified = 1;
492 rel_volume(root_vol);
497 return(result_offset);
502 * Format a new blockmap. Set the owner to the base of the blockmap
503 * (meaning either the blockmap layer1 bigblock, layer2 bigblock, or
507 format_blockmap(hammer_blockmap_t blockmap, hammer_off_t zone_off)
509 blockmap->phys_offset = alloc_bigblock(NULL, zone_off);
510 blockmap->alloc_offset = zone_off;
511 blockmap->next_offset = zone_off;
512 blockmap->entry_crc = crc32(blockmap, sizeof(*blockmap));
517 alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
518 struct buffer_info **bufferp)
520 struct buffer_info *buffer;
521 struct volume_info *volume;
522 hammer_blockmap_t rootmap;
523 struct hammer_blockmap_layer1 *layer1;
524 struct hammer_blockmap_layer2 *layer2;
525 hammer_off_t layer1_offset;
526 hammer_off_t layer2_offset;
527 hammer_off_t bigblock_offset;
530 volume = get_volume(RootVolNo);
532 rootmap = &volume->ondisk->vol0_blockmap[zone];
535 * Alignment and buffer-boundary issues
537 bytes = (bytes + 7) & ~7;
538 if ((rootmap->phys_offset ^ (rootmap->phys_offset + bytes - 1)) &
540 volume->cache.modified = 1;
541 rootmap->phys_offset = (rootmap->phys_offset + bytes) &
548 layer1_offset = rootmap->phys_offset +
549 HAMMER_BLOCKMAP_LAYER1_OFFSET(rootmap->alloc_offset);
551 layer1 = get_buffer_data(layer1_offset, bufferp, 0);
553 if ((rootmap->alloc_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0) {
554 buffer->cache.modified = 1;
555 bzero(layer1, sizeof(*layer1));
556 layer1->blocks_free = HAMMER_BLOCKMAP_RADIX2;
557 layer1->phys_offset = alloc_bigblock(NULL,
558 rootmap->alloc_offset);
564 layer2_offset = layer1->phys_offset +
565 HAMMER_BLOCKMAP_LAYER2_OFFSET(rootmap->alloc_offset);
567 layer2 = get_buffer_data(layer2_offset, bufferp, 0);
570 if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
571 buffer->cache.modified = 1;
572 bzero(layer2, sizeof(*layer2));
573 layer2->u.phys_offset = alloc_bigblock(NULL,
574 rootmap->alloc_offset);
575 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
578 buffer->cache.modified = 1;
579 volume->cache.modified = 1;
580 layer2->bytes_free -= bytes;
581 *result_offp = rootmap->alloc_offset;
582 rootmap->alloc_offset += bytes;
583 rootmap->next_offset = rootmap->alloc_offset;
585 bigblock_offset = layer2->u.phys_offset +
586 (*result_offp & HAMMER_LARGEBLOCK_MASK);
587 ptr = get_buffer_data(bigblock_offset, bufferp, 0);
589 buffer->cache.modified = 1;
597 * Reserve space from the FIFO. Make sure that bytes does not cross a
600 * Zero out base_bytes and initialize the fifo head and tail. The
601 * data area is not zerod.
605 hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
606 struct buffer_info **bufp, u_int16_t hdr_type)
608 struct buffer_info *buf;
609 struct volume_info *volume;
610 hammer_fifo_head_t head;
611 hammer_fifo_tail_t tail;
613 int32_t aligned_bytes;
615 aligned_bytes = (base_bytes + ext_bytes + HAMMER_TAIL_ONDISK_SIZE +
616 HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK;
618 volume = get_volume(RootVolNo);
619 off = volume->ondisk->vol0_fifo_end;
622 * For now don't deal with transitions across buffer boundaries,
623 * only newfs_hammer uses this function.
625 assert((off & ~HAMMER_BUFMASK64) ==
626 ((off + aligned_bytes) & ~HAMMER_BUFMASK));
628 *bufp = buf = get_buffer(off, 0);
630 buf->cache.modified = 1;
631 volume->cache.modified = 1;
633 head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK));
634 bzero(head, base_bytes);
636 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
637 head->hdr_type = hdr_type;
638 head->hdr_size = aligned_bytes;
639 head->hdr_seq = volume->ondisk->vol0_next_seq++;
641 tail = (void*)((char *)head + aligned_bytes - HAMMER_TAIL_ONDISK_SIZE);
642 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
643 tail->tail_type = hdr_type;
644 tail->tail_size = aligned_bytes;
646 volume->ondisk->vol0_fifo_end += aligned_bytes;
647 volume->cache.modified = 1;
657 * Flush various tracking structures to disk
661 * Flush various tracking structures to disk
664 flush_all_volumes(void)
666 struct volume_info *vol;
668 TAILQ_FOREACH(vol, &VolList, entry)
673 flush_volume(struct volume_info *volume)
675 struct buffer_info *buffer;
677 TAILQ_FOREACH(buffer, &volume->buffer_list, entry)
678 flush_buffer(buffer);
679 writehammerbuf(volume, volume->ondisk, 0);
680 volume->cache.modified = 0;
684 flush_buffer(struct buffer_info *buffer)
686 writehammerbuf(buffer->volume, buffer->ondisk, buffer->buf_disk_offset);
687 buffer->cache.modified = 0;
692 * Generic buffer initialization
695 init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
697 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
698 head->hdr_type = hdr_type;
708 * Core I/O operations
711 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
715 n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
716 if (n != HAMMER_BUFSIZE)
717 err(1, "Read volume %d (%s)", vol->vol_no, vol->name);
723 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
727 n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
728 if (n != HAMMER_BUFSIZE)
729 err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
733 panic(const char *ctl, ...)
738 vfprintf(stderr, ctl, va);
740 fprintf(stderr, "\n");