From b9107f58ed117184a899e7d65ffd8038f2f6ee52 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 16 Aug 2010 23:49:16 -0700 Subject: [PATCH] HAMMER Utility - Add catastrophic recovery feature * hammer -f recover * Add a catastrophic recovery feature. A HAMMER filesystem image is scanned (using the -f specification). Any buffer which looks like a B-Tree node is then sub-scanned for inode, directory, and data records and the filesystem is reconstructed in the specified target directory. * The files and directories are initially named after the object id and are renamed and moved as directory entries are found to resolve the fragmentory information. * File writes strip trailing 0's (data records are not limited to the file EOF), but will properly truncate the file if/when the related inode record is found. * Currently no attempt is made to restore owner, group, file modes, softlinks, or hardlinks (only one link will be restored). TODO: Currently a valid volume header is required, but the only thing we actually need from it is the vol_buf_beg field. This field could be guessed or passed in on the command line in a future update to the recovery code. --- sbin/hammer/Makefile | 2 +- sbin/hammer/blockmap.c | 73 ++++- sbin/hammer/cmd_recover.c | 552 ++++++++++++++++++++++++++++++++ sbin/hammer/cmd_show.c | 14 +- sbin/hammer/hammer.8 | 15 + sbin/hammer/hammer.c | 8 + sbin/hammer/hammer.h | 1 + sbin/hammer/hammer_util.h | 1 + sbin/hammer/ondisk.c | 19 +- sys/vfs/hammer/hammer.h | 1 + sys/vfs/hammer/hammer_cursor.c | 17 + sys/vfs/hammer/hammer_reblock.c | 7 +- 12 files changed, 690 insertions(+), 20 deletions(-) create mode 100644 sbin/hammer/cmd_recover.c diff --git a/sbin/hammer/Makefile b/sbin/hammer/Makefile index 098349a297..7ba6a4d4c3 100644 --- a/sbin/hammer/Makefile +++ b/sbin/hammer/Makefile @@ -8,7 +8,7 @@ SRCS= hammer.c ondisk.c blockmap.c cache.c misc.c cycle.c \ cmd_synctid.c cmd_stats.c \ cmd_pseudofs.c cmd_snapshot.c cmd_mirror.c cmd_status.c \ cmd_cleanup.c cmd_info.c cmd_version.c cmd_volume.c \ - cmd_config.c + cmd_config.c cmd_recover.c MAN= hammer.8 CFLAGS+= -I${.CURDIR}/../../sys -DALIST_NO_DEBUG diff --git a/sbin/hammer/blockmap.c b/sbin/hammer/blockmap.c index 8f3ffa24ae..d61bfa5359 100644 --- a/sbin/hammer/blockmap.c +++ b/sbin/hammer/blockmap.c @@ -42,7 +42,7 @@ blockmap_lookup(hammer_off_t zone_offset, struct hammer_blockmap_layer2 *save_layer2, int *errorp) { - struct volume_info *root_volume; + struct volume_info *root_volume = NULL; hammer_blockmap_t blockmap; hammer_blockmap_t freemap; struct hammer_blockmap_layer1 *layer1; @@ -53,14 +53,27 @@ blockmap_lookup(hammer_off_t zone_offset, hammer_off_t result_offset; int zone; int i; + int error = 0; zone = HAMMER_ZONE_DECODE(zone_offset); - if (errorp) - *errorp = 0; - assert(zone > HAMMER_ZONE_RAW_VOLUME_INDEX); - assert(zone < HAMMER_MAX_ZONES); - assert(RootVolNo >= 0); + if (AssertOnFailure) { + assert(zone > HAMMER_ZONE_RAW_VOLUME_INDEX); + assert(zone < HAMMER_MAX_ZONES); + assert(RootVolNo >= 0); + } else { + if (zone <= HAMMER_ZONE_RAW_VOLUME_INDEX) + error = EDOM; + if (zone >= HAMMER_MAX_ZONES) + error = EDOM; + if (RootVolNo < 0) + error = EDOM; + if (error) { + result_offset = HAMMER_OFF_BAD; + goto done; + } + } + root_volume = get_volume(RootVolNo); blockmap = &root_volume->ondisk->vol0_blockmap[zone]; @@ -69,7 +82,15 @@ blockmap_lookup(hammer_off_t zone_offset, } else if (zone == HAMMER_ZONE_UNDO_INDEX) { i = (zone_offset & HAMMER_OFF_SHORT_MASK) / HAMMER_LARGEBLOCK_SIZE; - assert(zone_offset < blockmap->alloc_offset); + if (AssertOnFailure) { + assert(zone_offset < blockmap->alloc_offset); + } else { + if (zone_offset >= blockmap->alloc_offset) { + error = EDOM; + result_offset = HAMMER_OFF_BAD; + goto done; + } + } result_offset = root_volume->ondisk->vol0_undo_array[i] + (zone_offset & HAMMER_LARGEBLOCK_MASK64); } else { @@ -77,7 +98,18 @@ blockmap_lookup(hammer_off_t zone_offset, HAMMER_ZONE_RAW_BUFFER; } - assert(HAMMER_ZONE_DECODE(blockmap->alloc_offset) == zone); + /* + * The blockmap should match the requested zone (else the volume + * header is mashed). + */ + if (AssertOnFailure) { + assert(HAMMER_ZONE_DECODE(blockmap->alloc_offset) == zone); + } else { + if (HAMMER_ZONE_DECODE(blockmap->alloc_offset) != zone) { + error = EDOM; + goto done; + } + } /* * Validate that the big-block is assigned to the zone. Also @@ -91,7 +123,14 @@ blockmap_lookup(hammer_off_t zone_offset, layer1_offset = freemap->phys_offset + HAMMER_BLOCKMAP_LAYER1_OFFSET(result_offset); layer1 = get_buffer_data(layer1_offset, &buffer, 0); - assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); + if (AssertOnFailure) { + assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); + } else { + if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { + error = EDOM; + goto done; + } + } if (save_layer1) *save_layer1 = *layer1; @@ -104,16 +143,22 @@ blockmap_lookup(hammer_off_t zone_offset, if (save_layer2) *save_layer2 = *layer2; - if (errorp) { - if (layer2->zone != zone) - *errorp = EDOM; - } else { + if (AssertOnFailure) { assert(layer2->zone == zone); + } else { + if (layer2->zone != zone) + error = EDOM; } +done: if (buffer) rel_buffer(buffer); - rel_volume(root_volume); + if (root_volume) + rel_volume(root_volume); + + if (errorp) + *errorp = error; + return(result_offset); } diff --git a/sbin/hammer/cmd_recover.c b/sbin/hammer/cmd_recover.c new file mode 100644 index 0000000000..43b6a45588 --- /dev/null +++ b/sbin/hammer/cmd_recover.c @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2010 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "hammer.h" + +struct recover_dict { + struct recover_dict *next; + struct recover_dict *parent; + int64_t obj_id; + uint8_t obj_type; + uint8_t flags; + uint16_t llid; + int64_t size; + char *name; +}; + +#define DICTF_MADEDIR 0x01 +#define DICTF_MADEFILE 0x02 +#define DICTF_PARENT 0x04 /* parent attached for real */ +#define DICTF_TRAVERSED 0x80 + +static void recover_top(char *ptr); +static void recover_elm(hammer_btree_leaf_elm_t leaf); +static struct recover_dict *get_dict(int64_t obj_id, uint16_t llid); +static char *recover_path(struct recover_dict *dict); +static void sanitize_string(char *str); + +static const char *TargetDir; +static int CachedFd = -1; +static char *CachedPath; + +void +hammer_cmd_recover(const char *target_dir) +{ + struct buffer_info *data_buffer; + struct volume_info *scan; + struct volume_info *volume; + hammer_off_t off; + hammer_off_t off_end; + char *ptr; + + AssertOnFailure = 0; + TargetDir = target_dir; + + printf("Running raw scan of HAMMER image, recovering to %s\n", + TargetDir); + mkdir(TargetDir, 0777); + + data_buffer = NULL; + TAILQ_FOREACH(scan, &VolList, entry) { + volume = get_volume(scan->vol_no); + + off = HAMMER_ZONE_RAW_BUFFER + 0; + off |= HAMMER_VOL_ENCODE(volume->vol_no); + off_end = off + (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg); + while (off < off_end) { + ptr = get_buffer_data(off, &data_buffer, 0); + if (ptr) { + recover_top(ptr); + off += HAMMER_BUFSIZE; + } + } + } + if (data_buffer) + rel_buffer(data_buffer); + + if (CachedPath) { + free(CachedPath); + close(CachedFd); + CachedPath = NULL; + CachedFd = -1; + } + + AssertOnFailure = 1; +} + +/* + * Top level recovery processor. Assume the data is a B-Tree node. + * If the CRC is good we attempt to process the node, building the + * object space and creating the dictionary as we go. + */ +static void +recover_top(char *ptr) +{ + struct hammer_node_ondisk *node; + hammer_btree_elm_t elm; + int maxcount; + int i; + + for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { + if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == + node->crc && + node->type == HAMMER_BTREE_TYPE_LEAF) { + /* + * Scan elements + */ + maxcount = HAMMER_BTREE_LEAF_ELMS; + for (i = 0; i < node->count && i < maxcount; ++i) { + elm = &node->elms[i]; + if (elm->base.btype != 'R') + continue; + recover_elm(&elm->leaf); + } + } + } +} + +static void +recover_elm(hammer_btree_leaf_elm_t leaf) +{ + struct buffer_info *data_buffer = NULL; + struct recover_dict *dict; + struct recover_dict *dict2; + hammer_data_ondisk_t ondisk; + hammer_off_t data_offset; + struct stat st; + int chunk; + int len; + int zfill; + int64_t file_offset; + uint16_t llid; + size_t nlen; + int fd; + char *name; + char *path1; + char *path2; + + /* + * Ignore deleted records + */ + if (leaf->delete_ts) + return; + if ((data_offset = leaf->data_offset) != 0) + ondisk = get_buffer_data(data_offset, &data_buffer, 0); + else + ondisk = NULL; + if (ondisk == NULL) + goto done; + + len = leaf->data_len; + chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); + if (chunk > len) + chunk = len; + + if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) + goto done; + + llid = leaf->base.localization >> 16; + + dict = get_dict(leaf->base.obj_id, llid); + + switch(leaf->base.rec_type) { + case HAMMER_RECTYPE_INODE: + /* + * We found an inode which also tells us where the file + * or directory is in the directory hierarchy. + */ + if (VerboseOpt) { + printf("file %016jx:%05d inode found\n", + (uintmax_t)leaf->base.obj_id, llid); + } + path1 = recover_path(dict); + + /* + * Attach the inode to its parent. This isn't strictly + * necessary because the information is also in the + * directory entries, but if we do not find the directory + * entry this ensures that the files will still be + * reasonably well organized in their proper directories. + */ + if ((dict->flags & DICTF_PARENT) == 0 && + dict->obj_id != 1 && ondisk->inode.parent_obj_id != 0) { + dict->flags |= DICTF_PARENT; + dict->parent = get_dict(ondisk->inode.parent_obj_id, + llid); + if (dict->parent && + (dict->parent->flags & DICTF_MADEDIR) == 0) { + dict->parent->flags |= DICTF_MADEDIR; + path2 = recover_path(dict->parent); + printf("mkdir %s\n", path2); + mkdir(path2, 0777); + free(path2); + path2 = NULL; + } + } + if (dict->obj_type == 0) + dict->obj_type = ondisk->inode.obj_type; + dict->size = ondisk->inode.size; + path2 = recover_path(dict); + + if (lstat(path1, &st) == 0) { + if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { + truncate(path1, dict->size); + /* chmod(path1, 0666); */ + } + if (strcmp(path1, path2)) { + printf("Rename %s -> %s\n", path1, path2); + rename(path1, path2); + } + } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { + printf("mkinode (file) %s\n", path2); + fd = open(path2, O_RDWR|O_CREAT, 0666); + if (fd > 0) + close(fd); + } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { + printf("mkinode (dir) %s\n", path2); + mkdir(path2, 0777); + dict->flags |= DICTF_MADEDIR; + } + free(path1); + free(path2); + break; + case HAMMER_RECTYPE_DATA: + /* + * File record data + */ + if (leaf->base.obj_id == 0) + break; + if (VerboseOpt) { + printf("file %016jx:%05d data %016jx,%d\n", + (uintmax_t)leaf->base.obj_id, + llid, + (uintmax_t)leaf->base.key - len, + len); + } + + /* + * Update the dictionary entry + */ + if (dict->obj_type == 0) + dict->obj_type = HAMMER_OBJTYPE_REGFILE; + + /* + * If the parent directory has not been created we + * have to create it (typically a PFS%05d) + */ + if (dict->parent && + (dict->parent->flags & DICTF_MADEDIR) == 0) { + dict->parent->flags |= DICTF_MADEDIR; + path2 = recover_path(dict->parent); + printf("mkdir %s\n", path2); + mkdir(path2, 0777); + free(path2); + path2 = NULL; + } + + /* + * Create the file if necessary, report file creations + */ + path1 = recover_path(dict); + if (CachedPath && strcmp(CachedPath, path1) == 0) { + fd = CachedFd; + } else { + fd = open(path1, O_CREAT|O_RDWR, 0666); + } + if (fd < 0) { + printf("Unable to create %s: %s\n", + path1, strerror(errno)); + free(path1); + break; + } + if ((dict->flags & DICTF_MADEFILE) == 0) { + dict->flags |= DICTF_MADEFILE; + printf("mkfile %s\n", path1); + } + + /* + * And write the record. A HAMMER data block is aligned + * and may contain trailing zeros after the file EOF. The + * inode record is required to get the actual file size. + * + * However, when the inode record is not available + * we can do a sparse write and that will get it right + * most of the time even if the inode record is never + * found. + */ + file_offset = (int64_t)leaf->base.key - len; + lseek(fd, (off_t)file_offset, SEEK_SET); + while (len) { + if (dict->size == -1) { + for (zfill = chunk - 1; zfill >= 0; --zfill) { + if (((char *)ondisk)[zfill]) + break; + } + ++zfill; + } else { + zfill = chunk; + } + + if (zfill) + write(fd, ondisk, zfill); + if (zfill < chunk) + lseek(fd, chunk - zfill, SEEK_CUR); + + len -= chunk; + data_offset += chunk; + file_offset += chunk; + ondisk = get_buffer_data(data_offset, &data_buffer, 0); + if (ondisk == NULL) + break; + chunk = HAMMER_BUFSIZE - + ((int)data_offset & HAMMER_BUFMASK); + if (chunk > len) + chunk = len; + } + if (dict->size >= 0 && file_offset > dict->size) { + ftruncate(fd, dict->size); + /* fchmod(fd, 0666); */ + } + + if (fd == CachedFd) { + free(path1); + } else if (CachedPath) { + free(CachedPath); + close(CachedFd); + CachedPath = path1; + CachedFd = fd; + } else { + CachedPath = path1; + CachedFd = fd; + } + break; + case HAMMER_RECTYPE_DIRENTRY: + nlen = len - offsetof(struct hammer_entry_data, name[0]); + if ((int)nlen < 0) /* illegal length */ + break; + if (ondisk->entry.obj_id == 0 || ondisk->entry.obj_id == 1) + break; + name = malloc(nlen + 1); + bcopy(ondisk->entry.name, name, nlen); + name[nlen] = 0; + sanitize_string(name); + + /* + * We can't deal with hardlinks so if the object already + * has a name assigned to it we just keep using that name. + */ + dict2 = get_dict(ondisk->entry.obj_id, llid); + path1 = recover_path(dict2); + + if (dict2->name == NULL) + dict2->name = name; + else + free(name); + + /* + * Attach dict2 to its directory (dict), create the + * directory (dict) if necessary. We must ensure + * that the directory entry exists in order to be + * able to properly rename() the file without creating + * a namespace conflict. + */ + if ((dict2->flags & DICTF_PARENT) == 0) { + dict2->flags |= DICTF_PARENT; + dict2->parent = dict; + if ((dict->flags & DICTF_MADEDIR) == 0) { + dict->flags |= DICTF_MADEDIR; + path2 = recover_path(dict); + printf("mkdir %s\n", path2); + mkdir(path2, 0777); + free(path2); + path2 = NULL; + } + } + path2 = recover_path(dict2); + if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { + printf("Rename %s -> %s\n", path1, path2); + rename(path1, path2); + } + free(path1); + free(path2); + + printf("dir %016jx:%05d entry %016jx \"%s\"\n", + (uintmax_t)leaf->base.obj_id, + llid, + (uintmax_t)ondisk->entry.obj_id, + name); + break; + default: + /* + * Ignore any other record types + */ + break; + } +done: + if (data_buffer) + rel_buffer(data_buffer); +} + +#define RD_HSIZE 32768 +#define RD_HMASK (RD_HSIZE - 1) + +struct recover_dict *RDHash[RD_HSIZE]; + +static +struct recover_dict * +get_dict(int64_t obj_id, uint16_t llid) +{ + struct recover_dict *dict; + int i; + + if (obj_id == 0) + return(NULL); + + i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; + for (dict = RDHash[i]; dict; dict = dict->next) { + if (dict->obj_id == obj_id && + dict->llid == llid) { + break; + } + } + if (dict == NULL) { + dict = malloc(sizeof(*dict)); + bzero(dict, sizeof(*dict)); + dict->obj_id = obj_id; + dict->llid = llid; + dict->next = RDHash[i]; + dict->size = -1; + RDHash[i] = dict; + + /* + * Always connect dangling dictionary entries to object 1 + * (the root of the PFS). + * + * DICTF_PARENT will not be set until we know what the + * real parent directory object is. + */ + if (dict->obj_id != 1) + dict->parent = get_dict(1, llid); + } + return(dict); +} + +struct path_info { + enum { PI_FIGURE, PI_LOAD } state; + uint16_t llid; + char *base; + char *next; + int len; +}; + +static void recover_path_helper(struct recover_dict *, struct path_info *); + +static +char * +recover_path(struct recover_dict *dict) +{ + struct path_info info; + + bzero(&info, sizeof(info)); + info.llid = dict->llid; + info.state = PI_FIGURE; + recover_path_helper(dict, &info); + info.base = malloc(info.len); + info.next = info.base; + info.state = PI_LOAD; + recover_path_helper(dict, &info); + + return(info.base); +} + +static +void +recover_path_helper(struct recover_dict *dict, struct path_info *info) +{ + /* + * Calculate path element length + */ + dict->flags |= DICTF_TRAVERSED; + + switch(info->state) { + case PI_FIGURE: + if (dict->obj_id == 1) + info->len += 8; + else if (dict->name) + info->len += strlen(dict->name); + else + info->len += 6 + 16; + ++info->len; + + if (dict->parent && + (dict->parent->flags & DICTF_TRAVERSED) == 0) { + recover_path_helper(dict->parent, info); + } else { + info->len += strlen(TargetDir) + 1; + } + break; + case PI_LOAD: + if (dict->parent && + (dict->parent->flags & DICTF_TRAVERSED) == 0) { + recover_path_helper(dict->parent, info); + } else { + strcpy(info->next, TargetDir); + info->next += strlen(info->next); + } + + *info->next++ = '/'; + if (dict->obj_id == 1) { + snprintf(info->next, 8+1, "PFS%05d", info->llid); + } else if (dict->name) { + strcpy(info->next, dict->name); + } else { + snprintf(info->next, 6+16+1, "obj_0x%016jx", + (uintmax_t)dict->obj_id); + } + info->next += strlen(info->next); + break; + } + dict->flags &= ~DICTF_TRAVERSED; +} + +static +void +sanitize_string(char *str) +{ + while (*str) { + if (!isprint(*str)) + *str = 'x'; + ++str; + } +} diff --git a/sbin/hammer/cmd_show.c b/sbin/hammer/cmd_show.c index 037cf29e07..59bdec3b8d 100644 --- a/sbin/hammer/cmd_show.c +++ b/sbin/hammer/cmd_show.c @@ -71,6 +71,8 @@ hammer_cmd_show(hammer_off_t node_offset, u_int32_t lo, int64_t obj_id, btree_search_t searchp; int zone; + AssertOnFailure = 0; + if (node_offset == (hammer_off_t)-1) { volume = get_volume(RootVolNo); node_offset = volume->ondisk->vol0_btree_root; @@ -104,6 +106,8 @@ hammer_cmd_show(hammer_off_t node_offset, u_int32_t lo, int64_t obj_id, left_bound, right_bound); print_btree_node(node_offset, searchp, depth, 1, HAMMER_MAX_TID, left_bound, right_bound); + + AssertOnFailure = 1; } static void @@ -123,6 +127,12 @@ print_btree_node(hammer_off_t node_offset, btree_search_t search, node = get_node(node_offset, &buffer); + if (node == NULL) { + printf("BI NODE %016jx (IO ERROR)\n", + (uintmax_t)node_offset); + return; + } + if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == node->crc) badc = ' '; else @@ -319,7 +329,9 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset, subnode = get_node(elm->internal.subtree_offset, &buffer); - if (subnode->parent != node_offset) + if (subnode == NULL) + flags |= FLAG_BADCHILDPARENT; + else if (subnode->parent != node_offset) flags |= FLAG_BADCHILDPARENT; rel_buffer(buffer); } diff --git a/sbin/hammer/hammer.8 b/sbin/hammer/hammer.8 index 30294148ec..e579eeb926 100644 --- a/sbin/hammer/hammer.8 +++ b/sbin/hammer/hammer.8 @@ -355,6 +355,21 @@ flag. .\" .It Ar blockmap .\" Dump the B-Tree, record, large-data, and small-data blockmaps, showing .\" physical block assignments and free space percentages. +.\" ==== recover ==== +.It Cm recover Ar targetdir +This is a low level command which operates on the filesystem image and +attempts to locate and recover files from a corrupted filesystem. The +entire image is scanned linearly looking for B-Tree nodes. Any node +found which passes its crc test is scanned for file, inode, and directory +fragments and the target directory is populated with the resulting data. +files and directories in the target directory are initially named after +the object id and are renamed as fragmentory information is processed. +.Pp +This command keeps track of filename/objid translations and may eat a +considerably amount of memory while operating. +.Pp +This command is literally the last line of defense when it comes to +recovering data from a dead filesystem. .\" ==== namekey1 ==== .It Cm namekey1 Ar filename Generate a diff --git a/sbin/hammer/hammer.c b/sbin/hammer/hammer.c index db5117bbb8..13b548df0e 100644 --- a/sbin/hammer/hammer.c +++ b/sbin/hammer/hammer.c @@ -454,6 +454,13 @@ main(int ac, char **av) hammer_cmd_show_undo(); exit(0); } + if (strcmp(av[0], "recover") == 0) { + hammer_parsedevs(blkdevs); + if (ac <= 1) + errx(1, "hammer recover required target directory"); + hammer_cmd_recover(av[1]); + exit(0); + } if (strcmp(av[0], "blockmap") == 0) { hammer_parsedevs(blkdevs); hammer_cmd_blockmap(); @@ -584,6 +591,7 @@ usage(int exit_code) fprintf(stderr, "hammer -f blkdevs show-undo\n" + "hammer -f blkdevs recover \n" ); exit(exit_code); diff --git a/sbin/hammer/hammer.h b/sbin/hammer/hammer.h index 50936494a4..30edf2c574 100644 --- a/sbin/hammer/hammer.h +++ b/sbin/hammer/hammer.h @@ -82,6 +82,7 @@ void hammer_cmd_show(hammer_tid_t node_offset, u_int32_t lo, int64_t obj_id, int depth, hammer_base_elm_t left_bound, hammer_base_elm_t right_bound); void hammer_cmd_show_undo(void); +void hammer_cmd_recover(const char *target_dir); void hammer_cmd_checkmap(void); void hammer_cmd_prune(char **av, int ac); void hammer_cmd_softprune(char **av, int ac, int everything_opt); diff --git a/sbin/hammer/hammer_util.h b/sbin/hammer/hammer_util.h index 53a741bbe5..6a9ef27a56 100644 --- a/sbin/hammer/hammer_util.h +++ b/sbin/hammer/hammer_util.h @@ -111,6 +111,7 @@ extern int RootVolNo; extern struct volume_list VolList; extern int UseReadBehind; extern int UseReadAhead; +extern int AssertOnFailure; uint32_t crc32(const void *buf, size_t size); uint32_t crc32_ext(const void *buf, size_t size, uint32_t ocrc); diff --git a/sbin/hammer/ondisk.c b/sbin/hammer/ondisk.c index 2c744a32cc..614a6adeaa 100644 --- a/sbin/hammer/ondisk.c +++ b/sbin/hammer/ondisk.c @@ -72,6 +72,7 @@ int NumVolumes; int RootVolNo = -1; int UseReadBehind = -4; int UseReadAhead = 4; +int AssertOnFailure = 1; struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); static __inline @@ -205,7 +206,13 @@ get_buffer(hammer_off_t buf_offset, int isnew) if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) { buf_offset = blockmap_lookup(buf_offset, NULL, NULL, NULL); } - assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER); + if (buf_offset == HAMMER_OFF_BAD) + return(NULL); + + if (AssertOnFailure) { + assert((buf_offset & HAMMER_OFF_ZONE_MASK) == + HAMMER_ZONE_RAW_BUFFER); + } vol_no = HAMMER_VOL_DECODE(buf_offset); volume = get_volume(vol_no); buf_offset &= ~HAMMER_BUFMASK64; @@ -249,11 +256,13 @@ get_buffer(hammer_off_t buf_offset, int isnew) n = pread(volume->fd, ondisk, HAMMER_BUFSIZE, buf->raw_offset); if (n != HAMMER_BUFSIZE) { + if (AssertOnFailure) err(1, "get_buffer: %s:%016llx Read failed at " "offset %016llx", volume->name, (long long)buf->buf_offset, (long long)buf->raw_offset); + bzero(ondisk, HAMMER_BUFSIZE); } } } @@ -357,8 +366,12 @@ get_node(hammer_off_t node_offset, struct buffer_info **bufp) if (*bufp) rel_buffer(*bufp); *bufp = buf = get_buffer(node_offset, 0); - return((void *)((char *)buf->ondisk + - (int32_t)(node_offset & HAMMER_BUFMASK))); + if (buf) { + return((void *)((char *)buf->ondisk + + (int32_t)(node_offset & HAMMER_BUFMASK))); + } else { + return(NULL); + } } /* diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index e61a181d19..c719bdf327 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -1128,6 +1128,7 @@ void hammer_cursor_parent_changed(hammer_node_t node, hammer_node_t oparent, hammer_node_t nparent, int nindex); void hammer_cursor_inserted_element(hammer_node_t node, int index); void hammer_cursor_deleted_element(hammer_node_t node, int index); +void hammer_cursor_invalidate_cache(hammer_cursor_t cursor); int hammer_btree_lookup(hammer_cursor_t cursor); int hammer_btree_first(hammer_cursor_t cursor); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 452cddd865..f6de3fa442 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -929,3 +929,20 @@ hammer_cursor_inserted_element(hammer_node_t node, int index) } } +/* + * Invalidate the cached data buffer associated with a cursor. + * + * This needs to be done when the underlying block is being freed or + * the referenced buffer can prevent the related buffer cache buffer + * from being properly invalidated. + */ +void +hammer_cursor_invalidate_cache(hammer_cursor_t cursor) +{ + if (cursor->data_buffer) { + hammer_rel_buffer(cursor->data_buffer, 0); + cursor->data_buffer = NULL; + cursor->data = NULL; + } +} + diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index d11e5290de..dd51889e59 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -419,11 +419,16 @@ hammer_reblock_data(struct hammer_ioc_reblock *reblock, hammer_io_notmeta(data_buffer); /* - * Move the data + * Move the data. Note that we must invalidate any cached + * data buffer in the cursor before calling blockmap_free. + * The blockmap_free may free up the entire large-block and + * will not be able to invalidate it if the cursor is holding + * a data buffer cached in that large block. */ hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0); bcopy(cursor->data, ndata, elm->leaf.data_len); hammer_modify_buffer_done(data_buffer); + hammer_cursor_invalidate_cache(cursor); hammer_blockmap_free(cursor->trans, elm->leaf.data_offset, elm->leaf.data_len); -- 2.41.0