From 07ed04b554263ecb5e36bf8ce5f9cd7e99614e1a Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 18 Apr 2010 22:36:14 -0700 Subject: [PATCH] HAMMER VFS - Fix probable corruption case when filesystem becomes nearly full * The reblocking code was incorrectly assuming the cursor would be pointing at a valid node element after an unlock/relock sequence, when it could actually be pointing at the EOF of a node. This case can occur when the filesystem is nearly full (possibly due to the reblocking operation itself), when the filesystem is also under load from unrelated operations. * This can result in the creation of a corrupted B-Tree leaf node or data record. * Corruption can be checked with hammer checkmap and hammer show (as of this rev): hammer -f device checkmap Should output no B-Tree node records or free space mismatches. You will still get the initial volume summary. hammer -f device show | egrep '^B' | egrep -v '^BM' Should output no records. * Currently the only recourse if corruption is found is to copy off the filesystem, newfs_hammer, and copy it back. Full history and snapshots can be retained by using 'hammer -B mirror-read' to copy off the filesystem and mirror-write to copy it back. However, pleaes remember you must do this for each PFS individually. Make sure you have a viable backup before newfsing anything. Reported-by: Francois Tigeot , Jan Lentfer --- sys/vfs/hammer/hammer_reblock.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index 76ea6a8a25..d11e5290de 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -130,6 +130,7 @@ retry: /* * Internal or Leaf node */ + KKASSERT(cursor.index < cursor.node->ondisk->count); elm = &cursor.node->ondisk->elms[cursor.index]; reblock->key_cur.obj_id = elm->base.obj_id; reblock->key_cur.localization = elm->base.localization; @@ -144,6 +145,10 @@ retry: * If there is insufficient free space it may be due to * reserved bigblocks, which flushing might fix. * + * We must force a retest in case the unlocked cursor is + * moved to the end of the leaf, or moved to an internal + * node. + * * WARNING: See warnings in hammer_unlock_cursor() function. */ if (hammer_checkspace(trans->hmp, slop)) { @@ -152,10 +157,11 @@ retry: break; } hammer_unlock_cursor(&cursor); + cursor.flags |= HAMMER_CURSOR_RETEST; hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async(trans->hmp, NULL); - continue; + goto skip; } /* @@ -198,11 +204,10 @@ retry: bwillwrite(HAMMER_XBUFSIZE); hammer_lock_cursor(&cursor); } - +skip: if (error == 0) { error = hammer_btree_iterate(&cursor); } - } if (error == ENOENT) error = 0; @@ -329,6 +334,7 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock, if (error == 0) error = hammer_cursor_upgrade(cursor); if (error == 0) { + KKASSERT(cursor->index < ondisk->count); error = hammer_reblock_data(reblock, cursor, elm); } @@ -357,10 +363,13 @@ skip: bytes >= reblock->free_level) { error = hammer_cursor_upgrade(cursor); if (error == 0) { - if (cursor->parent) + if (cursor->parent) { + KKASSERT(cursor->parent_index < + cursor->parent->ondisk->count); elm = &cursor->parent->ondisk->elms[cursor->parent_index]; - else + } else { elm = NULL; + } switch(cursor->node->ondisk->type) { case HAMMER_BTREE_TYPE_LEAF: error = hammer_reblock_leaf_node( -- 2.15.1