2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com> and
6 * Michael Neumann <mneumann@ntecs.de>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include <sys/fcntl.h>
39 #include <sys/nlookup.h>
43 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly);
46 hammer_close_device(struct vnode **devvpp, int ronly);
49 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
50 const char *vol_name, int vol_no, int vol_count,
51 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size);
54 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume);
57 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume);
61 hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
62 struct hammer_ioc_volume *ioc)
64 struct hammer_mount *hmp = trans->hmp;
65 struct mount *mp = hmp->mp;
66 hammer_volume_t volume;
69 if (mp->mnt_flag & MNT_RDONLY) {
70 kprintf("Cannot add volume to read-only HAMMER filesystem\n");
74 if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) {
75 kprintf("Max number of HAMMER volumes exceeded\n");
80 * Find an unused volume number.
83 while (free_vol_no < HAMMER_MAX_VOLUMES &&
84 RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
87 if (free_vol_no >= HAMMER_MAX_VOLUMES) {
88 kprintf("Max number of HAMMER volumes exceeded\n");
92 struct vnode *devvp = NULL;
93 error = hammer_setup_device(&devvp, ioc->device_name, 0);
97 error = hammer_format_volume_header(
100 hmp->rootvol->ondisk->vol_name,
106 hammer_close_device(&devvp, 0);
110 error = hammer_install_volume(hmp, ioc->device_name, NULL);
114 hammer_sync_lock_sh(trans);
115 hammer_lock_ex(&hmp->blkmap_lock);
120 * Set each volumes new value of the vol_count field.
122 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
123 volume = hammer_get_volume(hmp, vol_no, &error);
124 if (volume == NULL && error == ENOENT) {
126 * Skip unused volume numbers
131 KKASSERT(volume != NULL && error == 0);
132 hammer_modify_volume_field(trans, volume, vol_count);
133 volume->ondisk->vol_count = hmp->nvolumes;
134 hammer_modify_volume_done(volume);
137 * Only changes to the header of the root volume
138 * are automatically flushed to disk. For all
139 * other volumes that we modify we do it here.
141 if (volume != trans->rootvol && volume->io.modified) {
142 hammer_crc_set_volume(volume->ondisk);
143 hammer_io_flush(&volume->io, 0);
146 hammer_rel_volume(volume, 0);
149 volume = hammer_get_volume(hmp, free_vol_no, &error);
150 KKASSERT(volume != NULL && error == 0);
152 uint64_t total_free_bigblocks =
153 hammer_format_freemap(trans, volume);
156 * Increase the total number of bigblocks
158 hammer_modify_volume_field(trans, trans->rootvol,
159 vol0_stat_bigblocks);
160 trans->rootvol->ondisk->vol0_stat_bigblocks += total_free_bigblocks;
161 hammer_modify_volume_done(trans->rootvol);
164 * Increase the number of free bigblocks
165 * (including the copy in hmp)
167 hammer_modify_volume_field(trans, trans->rootvol,
168 vol0_stat_freebigblocks);
169 trans->rootvol->ondisk->vol0_stat_freebigblocks += total_free_bigblocks;
170 hmp->copy_stat_freebigblocks =
171 trans->rootvol->ondisk->vol0_stat_freebigblocks;
172 hammer_modify_volume_done(trans->rootvol);
174 hammer_rel_volume(volume, 0);
176 hammer_unlock(&hmp->blkmap_lock);
177 hammer_sync_unlock(trans);
181 kprintf("An error occurred: %d\n", error);
190 hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip,
191 struct hammer_ioc_volume *ioc)
193 struct hammer_mount *hmp = trans->hmp;
194 struct mount *mp = hmp->mp;
195 hammer_volume_t volume;
198 if (mp->mnt_flag & MNT_RDONLY) {
199 kprintf("Cannot del volume from read-only HAMMER filesystem\n");
207 * find volume by volname
209 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
210 volume = hammer_get_volume(hmp, vol_no, &error);
211 if (volume == NULL && error == ENOENT) {
213 * Skip unused volume numbers
218 KKASSERT(volume != NULL && error == 0);
219 if (strcmp(volume->vol_name, ioc->device_name) == 0) {
222 hammer_rel_volume(volume, 0);
226 if (volume == NULL) {
227 kprintf("Couldn't find volume\n");
231 if (volume == trans->rootvol) {
232 kprintf("Cannot remove root-volume\n");
233 hammer_rel_volume(volume, 0);
241 hmp->volume_to_remove = volume->vol_no;
243 struct hammer_ioc_reblock reblock;
244 bzero(&reblock, sizeof(reblock));
246 reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION;
247 reblock.key_beg.obj_id = HAMMER_MIN_OBJID;
248 reblock.key_end.localization = HAMMER_MAX_LOCALIZATION;
249 reblock.key_end.obj_id = HAMMER_MAX_OBJID;
250 reblock.head.flags = HAMMER_IOC_DO_FLAGS;
251 reblock.free_level = 0;
253 error = hammer_ioc_reblock(trans, ip, &reblock);
255 if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) {
260 if (error == EINTR) {
261 kprintf("reblock was interrupted\n");
263 kprintf("reblock failed: %d\n", error);
265 hmp->volume_to_remove = -1;
266 hammer_rel_volume(volume, 0);
274 while (hammer_flusher_haswork(hmp)) {
275 hammer_flusher_sync(hmp);
279 kprintf("HAMMER: flushing.");
282 tsleep(&count, 0, "hmrufl", hz);
285 kprintf("giving up");
291 hammer_sync_lock_sh(trans);
292 hammer_lock_ex(&hmp->blkmap_lock);
294 error = hammer_free_freemap(trans, volume);
296 kprintf("Failed to free volume\n");
297 hmp->volume_to_remove = -1;
298 hammer_rel_volume(volume, 0);
299 hammer_unlock(&hmp->blkmap_lock);
300 hammer_sync_unlock(trans);
304 hmp->volume_to_remove = -1;
306 hammer_rel_volume(volume, 0);
311 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
312 hammer_unload_buffer, volume);
314 error = hammer_unload_volume(volume, NULL);
316 kprintf("Failed to unload volume\n");
317 hammer_unlock(&hmp->blkmap_lock);
318 hammer_sync_unlock(trans);
326 * Set each volume's new value of the vol_count field.
328 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
329 volume = hammer_get_volume(hmp, vol_no, &error);
330 if (volume == NULL && error == ENOENT) {
332 * Skip unused volume numbers
338 KKASSERT(volume != NULL && error == 0);
339 hammer_modify_volume_field(trans, volume, vol_count);
340 volume->ondisk->vol_count = hmp->nvolumes;
341 hammer_modify_volume_done(volume);
344 * Only changes to the header of the root volume
345 * are automatically flushed to disk. For all
346 * other volumes that we modify we do it here.
348 if (volume != trans->rootvol && volume->io.modified) {
349 hammer_crc_set_volume(volume->ondisk);
350 hammer_io_flush(&volume->io, 0);
353 hammer_rel_volume(volume, 0);
356 hammer_unlock(&hmp->blkmap_lock);
357 hammer_sync_unlock(trans);
364 * Iterate over all usable L1 entries of the volume and
365 * the corresponding L2 entries.
368 hammer_iterate_l1l2_entries(hammer_transaction_t trans, hammer_volume_t volume,
369 int (*callback)(hammer_transaction_t, hammer_buffer_t *,
370 struct hammer_blockmap_layer1*, struct hammer_blockmap_layer2 *,
371 hammer_off_t, int, void *),
374 struct hammer_mount *hmp = trans->hmp;
375 hammer_blockmap_t freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
376 hammer_buffer_t buffer = NULL;
379 hammer_off_t phys_off;
380 hammer_off_t block_off;
381 hammer_off_t layer1_off;
382 hammer_off_t layer2_off;
383 hammer_off_t aligned_buf_end_off;
384 struct hammer_blockmap_layer1 *layer1;
385 struct hammer_blockmap_layer2 *layer2;
388 * Calculate the usable size of the volume, which
389 * must be aligned at a bigblock (8 MB) boundary.
391 aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
392 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
393 & ~HAMMER_LARGEBLOCK_MASK64));
396 * Iterate the volume's address space in chunks of 4 TB, where each
397 * chunk consists of at least one physically available 8 MB bigblock.
399 * For each chunk we need one L1 entry and one L2 bigblock.
400 * We use the first bigblock of each chunk as L2 block.
402 for (phys_off = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no, 0);
403 phys_off < aligned_buf_end_off;
404 phys_off += HAMMER_BLOCKMAP_LAYER2) {
406 block_off < HAMMER_BLOCKMAP_LAYER2;
407 block_off += HAMMER_LARGEBLOCK_SIZE) {
408 layer2_off = phys_off +
409 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_off);
410 layer2 = hammer_bread(hmp, layer2_off, &error,
416 if (block_off == 0) {
418 * The first entry represents the L2 bigblock
421 zone = HAMMER_ZONE_FREEMAP_INDEX;
422 } else if (phys_off + block_off < aligned_buf_end_off) {
429 * Bigblock outside of physically available
432 zone = HAMMER_ZONE_UNAVAIL_INDEX;
435 error = callback(trans, &buffer, NULL, layer2, 0, zone,
441 layer1_off = freemap->phys_offset +
442 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_off);
443 layer1 = hammer_bread(hmp, layer1_off, &error, &buffer);
447 error = callback(trans, &buffer, layer1, NULL, phys_off, 0,
455 hammer_rel_buffer(buffer, 0);
462 struct format_bigblock_stat {
463 uint64_t total_free_bigblocks;
464 uint64_t free_bigblocks;
468 format_callback(hammer_transaction_t trans, hammer_buffer_t *bufferp,
469 struct hammer_blockmap_layer1 *layer1,
470 struct hammer_blockmap_layer2 *layer2,
471 hammer_off_t phys_off,
475 struct format_bigblock_stat *stat = (struct format_bigblock_stat*)data;
478 KKASSERT(layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL);
480 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
481 bzero(layer1, sizeof(layer1));
482 layer1->phys_offset = phys_off;
483 layer1->blocks_free = stat->free_bigblocks;
484 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
485 hammer_modify_buffer_done(*bufferp);
487 stat->total_free_bigblocks += stat->free_bigblocks;
488 stat->free_bigblocks = 0; /* reset */
490 hammer_modify_buffer(trans, *bufferp, layer2, sizeof(*layer2));
491 bzero(layer2, sizeof(*layer2));
493 layer2->zone = layer2_zone;
495 switch (layer2->zone) {
496 case HAMMER_ZONE_FREEMAP_INDEX:
498 * The first entry represents the L2 bigblock itself.
500 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
501 layer2->bytes_free = 0;
508 layer2->append_off = 0;
509 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
510 ++stat->free_bigblocks;
513 case HAMMER_ZONE_UNAVAIL_INDEX:
515 * Bigblock outside of physically available space
517 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
518 layer2->bytes_free = 0;
524 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
525 hammer_modify_buffer_done(*bufferp);
534 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume)
538 struct format_bigblock_stat stat;
539 stat.total_free_bigblocks = 0;
540 stat.free_bigblocks = 0;
542 error = hammer_iterate_l1l2_entries(trans, volume, format_callback,
544 KKASSERT(error == 0);
546 return stat.total_free_bigblocks;
550 free_callback(hammer_transaction_t trans, hammer_buffer_t *bufferp,
551 struct hammer_blockmap_layer1 *layer1,
552 struct hammer_blockmap_layer2 *layer2,
553 hammer_off_t phys_off,
558 * No modifications to ondisk structures
560 int testonly = (data != NULL);
563 KKASSERT((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
564 trans->hmp->volume_to_remove);
572 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
573 bzero(layer1, sizeof(layer1));
574 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
575 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
576 hammer_modify_buffer_done(*bufferp);
580 switch (layer2->zone) {
581 case HAMMER_ZONE_FREEMAP_INDEX:
582 case HAMMER_ZONE_UNAVAIL_INDEX:
585 if (layer2->append_off == 0 &&
586 layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
602 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume)
605 error = hammer_iterate_l1l2_entries(trans, volume, free_callback,
610 error = hammer_iterate_l1l2_entries(trans, volume, free_callback, NULL);
614 /************************************************************************
616 ************************************************************************
620 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly)
623 struct nlookupdata nd;
626 * Get the device vnode
628 if (*devvpp == NULL) {
629 error = nlookup_init(&nd, dev_path, UIO_SYSSPACE, NLC_FOLLOW);
631 error = nlookup(&nd);
633 error = cache_vref(&nd.nl_nch, nd.nl_cred, devvpp);
640 if (vn_isdisk(*devvpp, &error)) {
641 error = vfs_mountedon(*devvpp);
644 if (error == 0 && vcount(*devvpp) > 0)
647 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
648 error = vinvalbuf(*devvpp, V_SAVE, 0, 0);
650 error = VOP_OPEN(*devvpp,
651 (ronly ? FREAD : FREAD|FWRITE),
656 if (error && *devvpp) {
664 hammer_close_device(struct vnode **devvpp, int ronly)
666 VOP_CLOSE(*devvpp, (ronly ? FREAD : FREAD|FWRITE));
668 vinvalbuf(*devvpp, ronly ? 0 : V_SAVE, 0, 0);
675 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
676 const char *vol_name, int vol_no, int vol_count,
677 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size)
679 struct buf *bp = NULL;
680 struct hammer_volume_ondisk *ondisk;
684 * Extract the volume number from the volume header and do various
687 KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
688 error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
689 if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
692 ondisk = (struct hammer_volume_ondisk*) bp->b_data;
695 * Note that we do NOT allow to use a device that contains
696 * a valid HAMMER signature. It has to be cleaned up with dd
699 if (ondisk->vol_signature == HAMMER_FSBUF_VOLUME) {
700 kprintf("hammer_volume_add: Formatting of valid HAMMER volume "
701 "%s denied. Erase with dd!\n", vol_name);
706 bzero(ondisk, sizeof(struct hammer_volume_ondisk));
707 ksnprintf(ondisk->vol_name, sizeof(ondisk->vol_name), "%s", vol_name);
708 ondisk->vol_fstype = hmp->rootvol->ondisk->vol_fstype;
709 ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
710 ondisk->vol_fsid = hmp->fsid;
711 ondisk->vol_rootvol = hmp->rootvol->vol_no;
712 ondisk->vol_no = vol_no;
713 ondisk->vol_count = vol_count;
714 ondisk->vol_version = hmp->version;
717 * Reserve space for (future) header junk, setup our poor-man's
718 * bigblock allocator.
720 int64_t vol_alloc = HAMMER_BUFSIZE * 16;
722 ondisk->vol_bot_beg = vol_alloc;
723 vol_alloc += boot_area_size;
724 ondisk->vol_mem_beg = vol_alloc;
725 vol_alloc += mem_area_size;
728 * The remaining area is the zone 2 buffer allocation area. These
731 ondisk->vol_buf_beg = vol_alloc;
732 ondisk->vol_buf_end = vol_size & ~(int64_t)HAMMER_BUFMASK;
734 if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
735 kprintf("volume %d %s is too small to hold the volume header",
736 ondisk->vol_no, ondisk->vol_name);
741 ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
743 ondisk->vol_blocksize = HAMMER_BUFSIZE;
746 * Write volume header to disk