HAMMER VFS - Fix probable corruption case when filesystem becomes nearly full v2.7.2
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 19 Apr 2010 05:36:14 +0000 (22:36 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 19 Apr 2010 05:36:14 +0000 (22:36 -0700)
* The reblocking code was incorrectly assuming the cursor would be pointing
  at a valid node element after an unlock/relock sequence, when it could
  actually be pointing at the EOF of a node.  This case can occur when
  the filesystem is nearly full (possibly due to the reblocking operation
  itself), when the filesystem is also under load from unrelated
  operations.

* This can result in the creation of a corrupted B-Tree leaf node or
  data record.

* Corruption can be checked with hammer checkmap and hammer show
  (as of this rev):

  hammer -f device checkmap

Should output no B-Tree node records or free space mismatches.
You will still get the initial volume summary.

  hammer -f device show | egrep '^B' | egrep -v '^BM'

Should output no records.

* Currently the only recourse if corruption is found is to copy off the
  filesystem, newfs_hammer, and copy it back.

  Full history and snapshots can be retained by using 'hammer -B mirror-read'
  to copy off the filesystem and mirror-write to copy it back.  However,
  pleaes remember you must do this for each PFS individually.  Make sure
  you have a viable backup before newfsing anything.

Reported-by: Francois Tigeot <ftigeot@wolfpond.org>, Jan Lentfer <Jan.Lentfer@web.de>
sys/vfs/hammer/hammer_reblock.c

index 76ea6a8..d11e529 100644 (file)
@@ -130,6 +130,7 @@ retry:
                /*
                 * Internal or Leaf node
                 */
+               KKASSERT(cursor.index < cursor.node->ondisk->count);
                elm = &cursor.node->ondisk->elms[cursor.index];
                reblock->key_cur.obj_id = elm->base.obj_id;
                reblock->key_cur.localization = elm->base.localization;
@@ -144,6 +145,10 @@ retry:
                 * If there is insufficient free space it may be due to
                 * reserved bigblocks, which flushing might fix.
                 *
+                * We must force a retest in case the unlocked cursor is
+                * moved to the end of the leaf, or moved to an internal
+                * node.
+                *
                 * WARNING: See warnings in hammer_unlock_cursor() function.
                 */
                if (hammer_checkspace(trans->hmp, slop)) {
@@ -152,10 +157,11 @@ retry:
                                break;
                        }
                        hammer_unlock_cursor(&cursor);
+                       cursor.flags |= HAMMER_CURSOR_RETEST;
                        hammer_flusher_wait(trans->hmp, seq);
                        hammer_lock_cursor(&cursor);
                        seq = hammer_flusher_async(trans->hmp, NULL);
-                       continue;
+                       goto skip;
                }
 
                /*
@@ -198,11 +204,10 @@ retry:
                        bwillwrite(HAMMER_XBUFSIZE);
                        hammer_lock_cursor(&cursor);
                }
-
+skip:
                if (error == 0) {
                        error = hammer_btree_iterate(&cursor);
                }
-
        }
        if (error == ENOENT)
                error = 0;
@@ -329,6 +334,7 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
                        if (error == 0)
                                error = hammer_cursor_upgrade(cursor);
                        if (error == 0) {
+                               KKASSERT(cursor->index < ondisk->count);
                                error = hammer_reblock_data(reblock,
                                                            cursor, elm);
                        }
@@ -357,10 +363,13 @@ skip:
                    bytes >= reblock->free_level) {
                        error = hammer_cursor_upgrade(cursor);
                        if (error == 0) {
-                               if (cursor->parent)
+                               if (cursor->parent) {
+                                       KKASSERT(cursor->parent_index <
+                                                cursor->parent->ondisk->count);
                                        elm = &cursor->parent->ondisk->elms[cursor->parent_index];
-                               else
+                               } else {
                                        elm = NULL;
+                               }
                                switch(cursor->node->ondisk->type) {
                                case HAMMER_BTREE_TYPE_LEAF:
                                        error = hammer_reblock_leaf_node(