2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com> and
6 * Michael Neumann <mneumann@ntecs.de>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include <sys/fcntl.h>
39 #include <sys/nlookup.h>
43 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly);
46 hammer_close_device(struct vnode **devvpp, int ronly);
49 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
50 const char *vol_name, int vol_no, int vol_count,
51 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size);
54 hammer_clear_volume_header(struct vnode *devvp);
56 struct bigblock_stat {
57 uint64_t total_bigblocks;
58 uint64_t total_free_bigblocks;
63 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume,
64 struct bigblock_stat *stat);
67 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume,
68 struct bigblock_stat *stat);
71 hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
72 struct hammer_ioc_volume *ioc)
74 struct hammer_mount *hmp = trans->hmp;
75 struct mount *mp = hmp->mp;
76 hammer_volume_t volume;
79 if (mp->mnt_flag & MNT_RDONLY) {
80 kprintf("Cannot add volume to read-only HAMMER filesystem\n");
84 if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) {
85 kprintf("Max number of HAMMER volumes exceeded\n");
89 if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
90 kprintf("Another volume operation is in progress!\n");
95 * Find an unused volume number.
98 while (free_vol_no < HAMMER_MAX_VOLUMES &&
99 RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
102 if (free_vol_no >= HAMMER_MAX_VOLUMES) {
103 kprintf("Max number of HAMMER volumes exceeded\n");
104 hammer_unlock(&hmp->volume_lock);
108 struct vnode *devvp = NULL;
109 error = hammer_setup_device(&devvp, ioc->device_name, 0);
113 error = hammer_format_volume_header(
116 hmp->rootvol->ondisk->vol_name,
122 hammer_close_device(&devvp, 0);
126 error = hammer_install_volume(hmp, ioc->device_name, NULL);
130 hammer_sync_lock_sh(trans);
131 hammer_lock_ex(&hmp->blkmap_lock);
136 * Set each volumes new value of the vol_count field.
138 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
139 volume = hammer_get_volume(hmp, vol_no, &error);
140 if (volume == NULL && error == ENOENT) {
142 * Skip unused volume numbers
147 KKASSERT(volume != NULL && error == 0);
148 hammer_modify_volume_field(trans, volume, vol_count);
149 volume->ondisk->vol_count = hmp->nvolumes;
150 hammer_modify_volume_done(volume);
153 * Only changes to the header of the root volume
154 * are automatically flushed to disk. For all
155 * other volumes that we modify we do it here.
157 * No interlock is needed, volume buffers are not
158 * messed with by bioops.
160 if (volume != trans->rootvol && volume->io.modified) {
161 hammer_crc_set_volume(volume->ondisk);
162 hammer_io_flush(&volume->io, 0);
165 hammer_rel_volume(volume, 0);
168 volume = hammer_get_volume(hmp, free_vol_no, &error);
169 KKASSERT(volume != NULL && error == 0);
171 struct bigblock_stat stat;
172 error = hammer_format_freemap(trans, volume, &stat);
173 KKASSERT(error == 0);
176 * Increase the total number of bigblocks
178 hammer_modify_volume_field(trans, trans->rootvol,
179 vol0_stat_bigblocks);
180 trans->rootvol->ondisk->vol0_stat_bigblocks += stat.total_bigblocks;
181 hammer_modify_volume_done(trans->rootvol);
184 * Increase the number of free bigblocks
185 * (including the copy in hmp)
187 hammer_modify_volume_field(trans, trans->rootvol,
188 vol0_stat_freebigblocks);
189 trans->rootvol->ondisk->vol0_stat_freebigblocks += stat.total_free_bigblocks;
190 hmp->copy_stat_freebigblocks =
191 trans->rootvol->ondisk->vol0_stat_freebigblocks;
192 hammer_modify_volume_done(trans->rootvol);
194 hammer_rel_volume(volume, 0);
196 hammer_unlock(&hmp->blkmap_lock);
197 hammer_sync_unlock(trans);
199 KKASSERT(error == 0);
201 hammer_unlock(&hmp->volume_lock);
203 kprintf("An error occurred: %d\n", error);
212 hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip,
213 struct hammer_ioc_volume *ioc)
215 struct hammer_mount *hmp = trans->hmp;
216 struct mount *mp = hmp->mp;
217 hammer_volume_t volume;
220 if (mp->mnt_flag & MNT_RDONLY) {
221 kprintf("Cannot del volume from read-only HAMMER filesystem\n");
225 if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
226 kprintf("Another volume operation is in progress!\n");
233 * find volume by volname
235 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
236 volume = hammer_get_volume(hmp, vol_no, &error);
237 if (volume == NULL && error == ENOENT) {
239 * Skip unused volume numbers
244 KKASSERT(volume != NULL && error == 0);
245 if (strcmp(volume->vol_name, ioc->device_name) == 0) {
248 hammer_rel_volume(volume, 0);
252 if (volume == NULL) {
253 kprintf("Couldn't find volume\n");
258 if (volume == trans->rootvol) {
259 kprintf("Cannot remove root-volume\n");
260 hammer_rel_volume(volume, 0);
269 hmp->volume_to_remove = volume->vol_no;
271 struct hammer_ioc_reblock reblock;
272 bzero(&reblock, sizeof(reblock));
274 reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION;
275 reblock.key_beg.obj_id = HAMMER_MIN_OBJID;
276 reblock.key_end.localization = HAMMER_MAX_LOCALIZATION;
277 reblock.key_end.obj_id = HAMMER_MAX_OBJID;
278 reblock.head.flags = HAMMER_IOC_DO_FLAGS;
279 reblock.free_level = 0;
281 error = hammer_ioc_reblock(trans, ip, &reblock);
283 if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) {
288 if (error == EINTR) {
289 kprintf("reblock was interrupted\n");
291 kprintf("reblock failed: %d\n", error);
293 hmp->volume_to_remove = -1;
294 hammer_rel_volume(volume, 0);
302 while (hammer_flusher_haswork(hmp)) {
303 hammer_flusher_sync(hmp);
307 kprintf("HAMMER: flushing.");
310 tsleep(&count, 0, "hmrufl", hz);
313 kprintf("giving up");
319 hammer_sync_lock_sh(trans);
320 hammer_lock_ex(&hmp->blkmap_lock);
323 * We use stat later to update rootvol's bigblock stats
325 struct bigblock_stat stat;
326 error = hammer_free_freemap(trans, volume, &stat);
328 kprintf("Failed to free volume. Volume not empty!\n");
329 hmp->volume_to_remove = -1;
330 hammer_rel_volume(volume, 0);
331 hammer_unlock(&hmp->blkmap_lock);
332 hammer_sync_unlock(trans);
336 hmp->volume_to_remove = -1;
338 hammer_rel_volume(volume, 0);
343 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
344 hammer_unload_buffer, volume);
346 error = hammer_unload_volume(volume, NULL);
348 kprintf("Failed to unload volume\n");
349 hammer_unlock(&hmp->blkmap_lock);
350 hammer_sync_unlock(trans);
358 * Set each volume's new value of the vol_count field.
360 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
361 volume = hammer_get_volume(hmp, vol_no, &error);
362 if (volume == NULL && error == ENOENT) {
364 * Skip unused volume numbers
370 KKASSERT(volume != NULL && error == 0);
371 hammer_modify_volume_field(trans, volume, vol_count);
372 volume->ondisk->vol_count = hmp->nvolumes;
373 hammer_modify_volume_done(volume);
376 * Only changes to the header of the root volume
377 * are automatically flushed to disk. For all
378 * other volumes that we modify we do it here.
380 * No interlock is needed, volume buffers are not
381 * messed with by bioops.
383 if (volume != trans->rootvol && volume->io.modified) {
384 hammer_crc_set_volume(volume->ondisk);
385 hammer_io_flush(&volume->io, 0);
388 hammer_rel_volume(volume, 0);
392 * Update the total number of bigblocks
394 hammer_modify_volume_field(trans, trans->rootvol,
395 vol0_stat_bigblocks);
396 trans->rootvol->ondisk->vol0_stat_bigblocks -= stat.total_bigblocks;
397 hammer_modify_volume_done(trans->rootvol);
400 * Update the number of free bigblocks
401 * (including the copy in hmp)
403 hammer_modify_volume_field(trans, trans->rootvol,
404 vol0_stat_freebigblocks);
405 trans->rootvol->ondisk->vol0_stat_freebigblocks -= stat.total_free_bigblocks;
406 hmp->copy_stat_freebigblocks =
407 trans->rootvol->ondisk->vol0_stat_freebigblocks;
408 hammer_modify_volume_done(trans->rootvol);
411 hammer_unlock(&hmp->blkmap_lock);
412 hammer_sync_unlock(trans);
415 * Erase the volume header of the removed device.
417 * This is to not accidentally mount the volume again.
419 struct vnode *devvp = NULL;
420 error = hammer_setup_device(&devvp, ioc->device_name, 0);
422 kprintf("Failed to open device: %s\n", ioc->device_name);
426 error = hammer_clear_volume_header(devvp);
428 kprintf("Failed to clear volume header of device: %s\n",
432 hammer_close_device(&devvp, 0);
434 KKASSERT(error == 0);
436 hammer_unlock(&hmp->volume_lock);
442 * Iterate over all usable L1 entries of the volume and
443 * the corresponding L2 entries.
446 hammer_iterate_l1l2_entries(hammer_transaction_t trans, hammer_volume_t volume,
447 int (*callback)(hammer_transaction_t, hammer_volume_t, hammer_buffer_t*,
448 struct hammer_blockmap_layer1*, struct hammer_blockmap_layer2*,
449 hammer_off_t, hammer_off_t, void*),
452 struct hammer_mount *hmp = trans->hmp;
453 hammer_blockmap_t freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
454 hammer_buffer_t buffer = NULL;
457 hammer_off_t phys_off;
458 hammer_off_t block_off;
459 hammer_off_t layer1_off;
460 hammer_off_t layer2_off;
461 hammer_off_t aligned_buf_end_off;
462 struct hammer_blockmap_layer1 *layer1;
463 struct hammer_blockmap_layer2 *layer2;
466 * Calculate the usable size of the volume, which
467 * must be aligned at a bigblock (8 MB) boundary.
469 aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
470 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
471 & ~HAMMER_LARGEBLOCK_MASK64));
474 * Iterate the volume's address space in chunks of 4 TB, where each
475 * chunk consists of at least one physically available 8 MB bigblock.
477 * For each chunk we need one L1 entry and one L2 bigblock.
478 * We use the first bigblock of each chunk as L2 block.
480 for (phys_off = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no, 0);
481 phys_off < aligned_buf_end_off;
482 phys_off += HAMMER_BLOCKMAP_LAYER2) {
484 block_off < HAMMER_BLOCKMAP_LAYER2;
485 block_off += HAMMER_LARGEBLOCK_SIZE) {
486 layer2_off = phys_off +
487 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_off);
488 layer2 = hammer_bread(hmp, layer2_off, &error, &buffer);
492 error = callback(trans, volume, &buffer, NULL,
493 layer2, phys_off, block_off, data);
498 layer1_off = freemap->phys_offset +
499 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_off);
500 layer1 = hammer_bread(hmp, layer1_off, &error, &buffer);
504 error = callback(trans, volume, &buffer, layer1, NULL,
512 hammer_rel_buffer(buffer, 0);
521 format_callback(hammer_transaction_t trans, hammer_volume_t volume,
522 hammer_buffer_t *bufferp,
523 struct hammer_blockmap_layer1 *layer1,
524 struct hammer_blockmap_layer2 *layer2,
525 hammer_off_t phys_off,
526 hammer_off_t block_off,
529 struct bigblock_stat *stat = (struct bigblock_stat*)data;
532 * Calculate the usable size of the volume, which must be aligned
533 * at a bigblock (8 MB) boundary.
535 hammer_off_t aligned_buf_end_off;
536 aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
537 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
538 & ~HAMMER_LARGEBLOCK_MASK64));
541 KKASSERT(layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL);
543 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
544 bzero(layer1, sizeof(layer1));
545 layer1->phys_offset = phys_off;
546 layer1->blocks_free = stat->counter;
547 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
548 hammer_modify_buffer_done(*bufferp);
550 stat->total_free_bigblocks += stat->counter;
551 stat->counter = 0; /* reset */
553 hammer_modify_buffer(trans, *bufferp, layer2, sizeof(*layer2));
554 bzero(layer2, sizeof(*layer2));
556 if (block_off == 0) {
558 * The first entry represents the L2 bigblock itself.
560 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
561 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
562 layer2->bytes_free = 0;
563 ++stat->total_bigblocks;
564 } else if (phys_off + block_off < aligned_buf_end_off) {
569 layer2->append_off = 0;
570 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
571 ++stat->total_bigblocks;
575 * Bigblock outside of physically available
578 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
579 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
580 layer2->bytes_free = 0;
583 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
584 hammer_modify_buffer_done(*bufferp);
593 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume,
594 struct bigblock_stat *stat)
596 stat->total_bigblocks = 0;
597 stat->total_free_bigblocks = 0;
599 return hammer_iterate_l1l2_entries(trans, volume, format_callback, stat);
603 free_callback(hammer_transaction_t trans, hammer_volume_t volume __unused,
604 hammer_buffer_t *bufferp,
605 struct hammer_blockmap_layer1 *layer1,
606 struct hammer_blockmap_layer2 *layer2,
607 hammer_off_t phys_off,
608 hammer_off_t block_off __unused,
611 struct bigblock_stat *stat = (struct bigblock_stat*)data;
614 * No modifications to ondisk structures
616 int testonly = (stat == NULL);
619 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
621 * This layer1 entry is already free.
626 KKASSERT((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
627 trans->hmp->volume_to_remove);
635 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
636 bzero(layer1, sizeof(layer1));
637 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
638 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
639 hammer_modify_buffer_done(*bufferp);
643 if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) {
647 if (layer2->zone == HAMMER_ZONE_FREEMAP_INDEX) {
649 ++stat->total_bigblocks;
654 if (layer2->append_off == 0 &&
655 layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
657 ++stat->total_bigblocks;
658 ++stat->total_free_bigblocks;
664 * We found a layer2 entry that is not empty!
675 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume,
676 struct bigblock_stat *stat)
680 stat->total_bigblocks = 0;
681 stat->total_free_bigblocks = 0;
684 error = hammer_iterate_l1l2_entries(trans, volume, free_callback, NULL);
688 error = hammer_iterate_l1l2_entries(trans, volume, free_callback, stat);
692 /************************************************************************
694 ************************************************************************
698 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly)
701 struct nlookupdata nd;
704 * Get the device vnode
706 if (*devvpp == NULL) {
707 error = nlookup_init(&nd, dev_path, UIO_SYSSPACE, NLC_FOLLOW);
709 error = nlookup(&nd);
711 error = cache_vref(&nd.nl_nch, nd.nl_cred, devvpp);
718 if (vn_isdisk(*devvpp, &error)) {
719 error = vfs_mountedon(*devvpp);
722 if (error == 0 && vcount(*devvpp) > 0)
725 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
726 error = vinvalbuf(*devvpp, V_SAVE, 0, 0);
728 error = VOP_OPEN(*devvpp,
729 (ronly ? FREAD : FREAD|FWRITE),
734 if (error && *devvpp) {
742 hammer_close_device(struct vnode **devvpp, int ronly)
744 VOP_CLOSE(*devvpp, (ronly ? FREAD : FREAD|FWRITE));
746 vinvalbuf(*devvpp, ronly ? 0 : V_SAVE, 0, 0);
753 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
754 const char *vol_name, int vol_no, int vol_count,
755 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size)
757 struct buf *bp = NULL;
758 struct hammer_volume_ondisk *ondisk;
762 * Extract the volume number from the volume header and do various
765 KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
766 error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
767 if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
770 ondisk = (struct hammer_volume_ondisk*) bp->b_data;
773 * Note that we do NOT allow to use a device that contains
774 * a valid HAMMER signature. It has to be cleaned up with dd
777 if (ondisk->vol_signature == HAMMER_FSBUF_VOLUME) {
778 kprintf("hammer_volume_add: Formatting of valid HAMMER volume "
779 "%s denied. Erase with dd!\n", vol_name);
784 bzero(ondisk, sizeof(struct hammer_volume_ondisk));
785 ksnprintf(ondisk->vol_name, sizeof(ondisk->vol_name), "%s", vol_name);
786 ondisk->vol_fstype = hmp->rootvol->ondisk->vol_fstype;
787 ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
788 ondisk->vol_fsid = hmp->fsid;
789 ondisk->vol_rootvol = hmp->rootvol->vol_no;
790 ondisk->vol_no = vol_no;
791 ondisk->vol_count = vol_count;
792 ondisk->vol_version = hmp->version;
795 * Reserve space for (future) header junk, setup our poor-man's
796 * bigblock allocator.
798 int64_t vol_alloc = HAMMER_BUFSIZE * 16;
800 ondisk->vol_bot_beg = vol_alloc;
801 vol_alloc += boot_area_size;
802 ondisk->vol_mem_beg = vol_alloc;
803 vol_alloc += mem_area_size;
806 * The remaining area is the zone 2 buffer allocation area. These
809 ondisk->vol_buf_beg = vol_alloc;
810 ondisk->vol_buf_end = vol_size & ~(int64_t)HAMMER_BUFMASK;
812 if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
813 kprintf("volume %d %s is too small to hold the volume header",
814 ondisk->vol_no, ondisk->vol_name);
819 ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
821 ondisk->vol_blocksize = HAMMER_BUFSIZE;
824 * Write volume header to disk
836 * Invalidates the volume header. Used by volume-del.
839 hammer_clear_volume_header(struct vnode *devvp)
841 struct buf *bp = NULL;
842 struct hammer_volume_ondisk *ondisk;
845 KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
846 error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
847 if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
850 ondisk = (struct hammer_volume_ondisk*) bp->b_data;
851 bzero(ondisk, sizeof(struct hammer_volume_ondisk));