2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com> and
6 * Michael Neumann <mneumann@ntecs.de>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include <sys/fcntl.h>
39 #include <sys/nlookup.h>
43 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly);
46 hammer_close_device(struct vnode **devvpp, int ronly);
49 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
50 const char *vol_name, int vol_no, int vol_count,
51 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size);
54 hammer_clear_volume_header(struct vnode *devvp);
57 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume);
60 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume);
64 hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
65 struct hammer_ioc_volume *ioc)
67 struct hammer_mount *hmp = trans->hmp;
68 struct mount *mp = hmp->mp;
69 hammer_volume_t volume;
72 if (mp->mnt_flag & MNT_RDONLY) {
73 kprintf("Cannot add volume to read-only HAMMER filesystem\n");
77 if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) {
78 kprintf("Max number of HAMMER volumes exceeded\n");
82 if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
83 kprintf("Another volume operation is in progress!\n");
88 * Find an unused volume number.
91 while (free_vol_no < HAMMER_MAX_VOLUMES &&
92 RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
95 if (free_vol_no >= HAMMER_MAX_VOLUMES) {
96 kprintf("Max number of HAMMER volumes exceeded\n");
97 hammer_unlock(&hmp->volume_lock);
101 struct vnode *devvp = NULL;
102 error = hammer_setup_device(&devvp, ioc->device_name, 0);
106 error = hammer_format_volume_header(
109 hmp->rootvol->ondisk->vol_name,
115 hammer_close_device(&devvp, 0);
119 error = hammer_install_volume(hmp, ioc->device_name, NULL);
123 hammer_sync_lock_sh(trans);
124 hammer_lock_ex(&hmp->blkmap_lock);
129 * Set each volumes new value of the vol_count field.
131 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
132 volume = hammer_get_volume(hmp, vol_no, &error);
133 if (volume == NULL && error == ENOENT) {
135 * Skip unused volume numbers
140 KKASSERT(volume != NULL && error == 0);
141 hammer_modify_volume_field(trans, volume, vol_count);
142 volume->ondisk->vol_count = hmp->nvolumes;
143 hammer_modify_volume_done(volume);
146 * Only changes to the header of the root volume
147 * are automatically flushed to disk. For all
148 * other volumes that we modify we do it here.
150 if (volume != trans->rootvol && volume->io.modified) {
151 hammer_crc_set_volume(volume->ondisk);
152 hammer_io_flush(&volume->io, 0);
155 hammer_rel_volume(volume, 0);
158 volume = hammer_get_volume(hmp, free_vol_no, &error);
159 KKASSERT(volume != NULL && error == 0);
161 uint64_t total_free_bigblocks =
162 hammer_format_freemap(trans, volume);
165 * Increase the total number of bigblocks
167 hammer_modify_volume_field(trans, trans->rootvol,
168 vol0_stat_bigblocks);
169 trans->rootvol->ondisk->vol0_stat_bigblocks += total_free_bigblocks;
170 hammer_modify_volume_done(trans->rootvol);
173 * Increase the number of free bigblocks
174 * (including the copy in hmp)
176 hammer_modify_volume_field(trans, trans->rootvol,
177 vol0_stat_freebigblocks);
178 trans->rootvol->ondisk->vol0_stat_freebigblocks += total_free_bigblocks;
179 hmp->copy_stat_freebigblocks =
180 trans->rootvol->ondisk->vol0_stat_freebigblocks;
181 hammer_modify_volume_done(trans->rootvol);
183 hammer_rel_volume(volume, 0);
185 hammer_unlock(&hmp->blkmap_lock);
186 hammer_sync_unlock(trans);
188 KKASSERT(error == 0);
190 hammer_unlock(&hmp->volume_lock);
192 kprintf("An error occurred: %d\n", error);
201 hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip,
202 struct hammer_ioc_volume *ioc)
204 struct hammer_mount *hmp = trans->hmp;
205 struct mount *mp = hmp->mp;
206 hammer_volume_t volume;
209 if (mp->mnt_flag & MNT_RDONLY) {
210 kprintf("Cannot del volume from read-only HAMMER filesystem\n");
214 if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
215 kprintf("Another volume operation is in progress!\n");
222 * find volume by volname
224 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
225 volume = hammer_get_volume(hmp, vol_no, &error);
226 if (volume == NULL && error == ENOENT) {
228 * Skip unused volume numbers
233 KKASSERT(volume != NULL && error == 0);
234 if (strcmp(volume->vol_name, ioc->device_name) == 0) {
237 hammer_rel_volume(volume, 0);
241 if (volume == NULL) {
242 kprintf("Couldn't find volume\n");
247 if (volume == trans->rootvol) {
248 kprintf("Cannot remove root-volume\n");
249 hammer_rel_volume(volume, 0);
258 hmp->volume_to_remove = volume->vol_no;
260 struct hammer_ioc_reblock reblock;
261 bzero(&reblock, sizeof(reblock));
263 reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION;
264 reblock.key_beg.obj_id = HAMMER_MIN_OBJID;
265 reblock.key_end.localization = HAMMER_MAX_LOCALIZATION;
266 reblock.key_end.obj_id = HAMMER_MAX_OBJID;
267 reblock.head.flags = HAMMER_IOC_DO_FLAGS;
268 reblock.free_level = 0;
270 error = hammer_ioc_reblock(trans, ip, &reblock);
272 if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) {
277 if (error == EINTR) {
278 kprintf("reblock was interrupted\n");
280 kprintf("reblock failed: %d\n", error);
282 hmp->volume_to_remove = -1;
283 hammer_rel_volume(volume, 0);
291 while (hammer_flusher_haswork(hmp)) {
292 hammer_flusher_sync(hmp);
296 kprintf("HAMMER: flushing.");
299 tsleep(&count, 0, "hmrufl", hz);
302 kprintf("giving up");
308 hammer_sync_lock_sh(trans);
309 hammer_lock_ex(&hmp->blkmap_lock);
311 error = hammer_free_freemap(trans, volume);
313 kprintf("Failed to free volume. Volume not empty!\n");
314 hmp->volume_to_remove = -1;
315 hammer_rel_volume(volume, 0);
316 hammer_unlock(&hmp->blkmap_lock);
317 hammer_sync_unlock(trans);
321 hmp->volume_to_remove = -1;
323 hammer_rel_volume(volume, 0);
328 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
329 hammer_unload_buffer, volume);
331 error = hammer_unload_volume(volume, NULL);
333 kprintf("Failed to unload volume\n");
334 hammer_unlock(&hmp->blkmap_lock);
335 hammer_sync_unlock(trans);
343 * Set each volume's new value of the vol_count field.
345 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
346 volume = hammer_get_volume(hmp, vol_no, &error);
347 if (volume == NULL && error == ENOENT) {
349 * Skip unused volume numbers
355 KKASSERT(volume != NULL && error == 0);
356 hammer_modify_volume_field(trans, volume, vol_count);
357 volume->ondisk->vol_count = hmp->nvolumes;
358 hammer_modify_volume_done(volume);
361 * Only changes to the header of the root volume
362 * are automatically flushed to disk. For all
363 * other volumes that we modify we do it here.
365 if (volume != trans->rootvol && volume->io.modified) {
366 hammer_crc_set_volume(volume->ondisk);
367 hammer_io_flush(&volume->io, 0);
370 hammer_rel_volume(volume, 0);
373 hammer_unlock(&hmp->blkmap_lock);
374 hammer_sync_unlock(trans);
377 * Erase the volume header of the removed device.
379 * This is to not accidentally mount the volume again.
381 struct vnode *devvp = NULL;
382 error = hammer_setup_device(&devvp, ioc->device_name, 0);
384 kprintf("Failed to open device: %s\n", ioc->device_name);
388 error = hammer_clear_volume_header(devvp);
390 kprintf("Failed to clear volume header of device: %s\n",
394 hammer_close_device(&devvp, 0);
396 KKASSERT(error == 0);
398 hammer_unlock(&hmp->volume_lock);
404 * Iterate over all usable L1 entries of the volume and
405 * the corresponding L2 entries.
408 hammer_iterate_l1l2_entries(hammer_transaction_t trans, hammer_volume_t volume,
409 int (*callback)(hammer_transaction_t, hammer_volume_t, hammer_buffer_t*,
410 struct hammer_blockmap_layer1*, struct hammer_blockmap_layer2*,
411 hammer_off_t, hammer_off_t, void*),
414 struct hammer_mount *hmp = trans->hmp;
415 hammer_blockmap_t freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
416 hammer_buffer_t buffer = NULL;
419 hammer_off_t phys_off;
420 hammer_off_t block_off;
421 hammer_off_t layer1_off;
422 hammer_off_t layer2_off;
423 hammer_off_t aligned_buf_end_off;
424 struct hammer_blockmap_layer1 *layer1;
425 struct hammer_blockmap_layer2 *layer2;
428 * Calculate the usable size of the volume, which
429 * must be aligned at a bigblock (8 MB) boundary.
431 aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
432 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
433 & ~HAMMER_LARGEBLOCK_MASK64));
436 * Iterate the volume's address space in chunks of 4 TB, where each
437 * chunk consists of at least one physically available 8 MB bigblock.
439 * For each chunk we need one L1 entry and one L2 bigblock.
440 * We use the first bigblock of each chunk as L2 block.
442 for (phys_off = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no, 0);
443 phys_off < aligned_buf_end_off;
444 phys_off += HAMMER_BLOCKMAP_LAYER2) {
446 block_off < HAMMER_BLOCKMAP_LAYER2;
447 block_off += HAMMER_LARGEBLOCK_SIZE) {
448 layer2_off = phys_off +
449 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_off);
450 layer2 = hammer_bread(hmp, layer2_off, &error, &buffer);
454 error = callback(trans, volume, &buffer, NULL,
455 layer2, phys_off, block_off, data);
460 layer1_off = freemap->phys_offset +
461 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_off);
462 layer1 = hammer_bread(hmp, layer1_off, &error, &buffer);
466 error = callback(trans, volume, &buffer, layer1, NULL,
474 hammer_rel_buffer(buffer, 0);
481 struct format_bigblock_stat {
482 uint64_t total_free_bigblocks;
483 uint64_t free_bigblocks;
487 format_callback(hammer_transaction_t trans, hammer_volume_t volume,
488 hammer_buffer_t *bufferp,
489 struct hammer_blockmap_layer1 *layer1,
490 struct hammer_blockmap_layer2 *layer2,
491 hammer_off_t phys_off,
492 hammer_off_t block_off,
495 struct format_bigblock_stat *stat = (struct format_bigblock_stat*)data;
498 * Calculate the usable size of the volume, which must be aligned
499 * at a bigblock (8 MB) boundary.
501 hammer_off_t aligned_buf_end_off;
502 aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
503 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
504 & ~HAMMER_LARGEBLOCK_MASK64));
507 KKASSERT(layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL);
509 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
510 bzero(layer1, sizeof(layer1));
511 layer1->phys_offset = phys_off;
512 layer1->blocks_free = stat->free_bigblocks;
513 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
514 hammer_modify_buffer_done(*bufferp);
516 stat->total_free_bigblocks += stat->free_bigblocks;
517 stat->free_bigblocks = 0; /* reset */
519 hammer_modify_buffer(trans, *bufferp, layer2, sizeof(*layer2));
520 bzero(layer2, sizeof(*layer2));
522 if (block_off == 0) {
524 * The first entry represents the L2 bigblock itself.
526 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
527 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
528 layer2->bytes_free = 0;
529 } else if (phys_off + block_off < aligned_buf_end_off) {
534 layer2->append_off = 0;
535 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
536 ++stat->free_bigblocks;
539 * Bigblock outside of physically available
542 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
543 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
544 layer2->bytes_free = 0;
547 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
548 hammer_modify_buffer_done(*bufferp);
557 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume)
561 struct format_bigblock_stat stat;
562 stat.total_free_bigblocks = 0;
563 stat.free_bigblocks = 0;
565 error = hammer_iterate_l1l2_entries(trans, volume, format_callback,
567 KKASSERT(error == 0);
569 return stat.total_free_bigblocks;
573 free_callback(hammer_transaction_t trans, hammer_volume_t volume __unused,
574 hammer_buffer_t *bufferp,
575 struct hammer_blockmap_layer1 *layer1,
576 struct hammer_blockmap_layer2 *layer2,
577 hammer_off_t phys_off,
578 hammer_off_t block_off __unused,
582 * No modifications to ondisk structures
584 int testonly = (data != NULL);
587 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
589 * This layer1 entry is already free.
594 KKASSERT((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
595 trans->hmp->volume_to_remove);
603 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
604 bzero(layer1, sizeof(layer1));
605 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
606 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
607 hammer_modify_buffer_done(*bufferp);
611 if (layer2->zone == HAMMER_ZONE_FREEMAP_INDEX ||
612 layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX)
615 if (layer2->append_off == 0 &&
616 layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
619 * We found a layer2 entry that is not empty!
630 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume)
633 error = hammer_iterate_l1l2_entries(trans, volume, free_callback,
638 error = hammer_iterate_l1l2_entries(trans, volume, free_callback, NULL);
642 /************************************************************************
644 ************************************************************************
648 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly)
651 struct nlookupdata nd;
654 * Get the device vnode
656 if (*devvpp == NULL) {
657 error = nlookup_init(&nd, dev_path, UIO_SYSSPACE, NLC_FOLLOW);
659 error = nlookup(&nd);
661 error = cache_vref(&nd.nl_nch, nd.nl_cred, devvpp);
668 if (vn_isdisk(*devvpp, &error)) {
669 error = vfs_mountedon(*devvpp);
672 if (error == 0 && vcount(*devvpp) > 0)
675 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
676 error = vinvalbuf(*devvpp, V_SAVE, 0, 0);
678 error = VOP_OPEN(*devvpp,
679 (ronly ? FREAD : FREAD|FWRITE),
684 if (error && *devvpp) {
692 hammer_close_device(struct vnode **devvpp, int ronly)
694 VOP_CLOSE(*devvpp, (ronly ? FREAD : FREAD|FWRITE));
696 vinvalbuf(*devvpp, ronly ? 0 : V_SAVE, 0, 0);
703 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
704 const char *vol_name, int vol_no, int vol_count,
705 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size)
707 struct buf *bp = NULL;
708 struct hammer_volume_ondisk *ondisk;
712 * Extract the volume number from the volume header and do various
715 KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
716 error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
717 if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
720 ondisk = (struct hammer_volume_ondisk*) bp->b_data;
723 * Note that we do NOT allow to use a device that contains
724 * a valid HAMMER signature. It has to be cleaned up with dd
727 if (ondisk->vol_signature == HAMMER_FSBUF_VOLUME) {
728 kprintf("hammer_volume_add: Formatting of valid HAMMER volume "
729 "%s denied. Erase with dd!\n", vol_name);
734 bzero(ondisk, sizeof(struct hammer_volume_ondisk));
735 ksnprintf(ondisk->vol_name, sizeof(ondisk->vol_name), "%s", vol_name);
736 ondisk->vol_fstype = hmp->rootvol->ondisk->vol_fstype;
737 ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
738 ondisk->vol_fsid = hmp->fsid;
739 ondisk->vol_rootvol = hmp->rootvol->vol_no;
740 ondisk->vol_no = vol_no;
741 ondisk->vol_count = vol_count;
742 ondisk->vol_version = hmp->version;
745 * Reserve space for (future) header junk, setup our poor-man's
746 * bigblock allocator.
748 int64_t vol_alloc = HAMMER_BUFSIZE * 16;
750 ondisk->vol_bot_beg = vol_alloc;
751 vol_alloc += boot_area_size;
752 ondisk->vol_mem_beg = vol_alloc;
753 vol_alloc += mem_area_size;
756 * The remaining area is the zone 2 buffer allocation area. These
759 ondisk->vol_buf_beg = vol_alloc;
760 ondisk->vol_buf_end = vol_size & ~(int64_t)HAMMER_BUFMASK;
762 if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
763 kprintf("volume %d %s is too small to hold the volume header",
764 ondisk->vol_no, ondisk->vol_name);
769 ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
771 ondisk->vol_blocksize = HAMMER_BUFSIZE;
774 * Write volume header to disk
786 * Invalidates the volume header. Used by volume-del.
789 hammer_clear_volume_header(struct vnode *devvp)
791 struct buf *bp = NULL;
792 struct hammer_volume_ondisk *ondisk;
795 KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
796 error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
797 if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
800 ondisk = (struct hammer_volume_ondisk*) bp->b_data;
801 bzero(ondisk, sizeof(struct hammer_volume_ondisk));