From 6dc17446feb14cbca471e4eecf2519d215f5054d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 13 Mar 2010 21:16:38 -0800 Subject: [PATCH] HAMMER VFS - Hack cursor iterator when unlocked cursor moved to parent * It is possible to reverse-index a cursor while it is unlocked due to a node deletion moving cursors on that node to the parent, and a subsequent insertion then inserting new elements between the cursor's current position and its expected iteration range. * Detect the case with a new flag (hack!) HAMMER_CURSOR_ITERATE_CHECK and just iterate past the elements outside the iteration range in this case. Reported-by: YONETANI Tomokazu --- sys/vfs/hammer/hammer_btree.c | 89 +++++++++++++++++++++++++++++----- sys/vfs/hammer/hammer_cursor.c | 10 +++- sys/vfs/hammer/hammer_cursor.h | 1 + 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 9cc7397a63..3eaa1b34d3 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -110,6 +110,12 @@ static void hammer_cursor_mirror_filter(hammer_cursor_t cursor); * to reinitiate the lookup and set key_beg to properly pick up where we * left off. * + * If HAMMER_CURSOR_ITERATE_CHECK is set it is possible that the cursor + * was reverse indexed due to being moved to a parent while unlocked, + * and something else might have inserted an element outside the iteration + * range. When this case occurs the iterator just keeps iterating until + * it gets back into the iteration range (instead of asserting). + * * NOTE! EDEADLK *CANNOT* be returned by this procedure. */ int @@ -237,25 +243,41 @@ hammer_btree_iterate(hammer_cursor_t cursor) error = ENOENT; break; } - KKASSERT(s <= 0); /* * Better not be zero */ KKASSERT(elm->internal.subtree_offset != 0); - /* - * If running the mirror filter see if we can skip - * one or more entire sub-trees. If we can we - * return the internal mode and the caller processes - * the skipped range (see mirror_read) - */ - if (cursor->flags & HAMMER_CURSOR_MIRROR_FILTERED) { - if (elm->internal.mirror_tid < - cursor->cmirror->mirror_tid) { - hammer_cursor_mirror_filter(cursor); - return(0); + if (s <= 0) { + /* + * If running the mirror filter see if we + * can skip one or more entire sub-trees. + * If we can we return the internal node + * and the caller processes the skipped + * range (see mirror_read). + */ + if (cursor->flags & + HAMMER_CURSOR_MIRROR_FILTERED) { + if (elm->internal.mirror_tid < + cursor->cmirror->mirror_tid) { + hammer_cursor_mirror_filter(cursor); + return(0); + } } + } else { + /* + * Normally it would be impossible for the + * cursor to have gotten back-indexed, + * but it can happen if a node is deleted + * and the cursor is moved to its parent + * internal node. ITERATE_CHECK will be set. + */ + KKASSERT(cursor->flags & + HAMMER_CURSOR_ITERATE_CHECK); + kprintf("hammer_btree_iterate: " + "DEBUG: Caught parent seek " + "in internal iteration\n"); } error = hammer_cursor_down(cursor); @@ -296,6 +318,28 @@ hammer_btree_iterate(hammer_cursor_t cursor) break; } + /* + * If ITERATE_CHECK is set an unlocked cursor may + * have been moved to a parent and the iterate can + * happen upon elements that are not in the requested + * range. + */ + if (cursor->flags & HAMMER_CURSOR_ITERATE_CHECK) { + s = hammer_btree_cmp(&cursor->key_beg, + &elm->base); + if (s > 0) { + kprintf("hammer_btree_iterate: " + "DEBUG: Caught parent seek " + "in leaf iteration\n"); + ++cursor->index; + continue; + } + } + cursor->flags &= ~HAMMER_CURSOR_ITERATE_CHECK; + + /* + * Return the element + */ switch(elm->leaf.base.btype) { case HAMMER_BTREE_TYPE_RECORD: if ((cursor->flags & HAMMER_CURSOR_ASOF) && @@ -471,6 +515,11 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor) error = ENOENT; break; } + + /* + * It shouldn't be possible to be seeked past key_end, + * even if the cursor got moved to a parent. + */ KKASSERT(r >= 0); /* @@ -509,6 +558,15 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor) break; } + /* + * It shouldn't be possible to be seeked past key_end, + * even if the cursor got moved to a parent. + */ + cursor->flags &= ~HAMMER_CURSOR_ITERATE_CHECK; + + /* + * Return the element + */ switch(elm->leaf.base.btype) { case HAMMER_BTREE_TYPE_RECORD: if ((cursor->flags & HAMMER_CURSOR_ASOF) && @@ -588,6 +646,7 @@ hammer_btree_lookup(hammer_cursor_t cursor) { int error; + cursor->flags &= ~HAMMER_CURSOR_ITERATE_CHECK; KKASSERT ((cursor->flags & HAMMER_CURSOR_INSERT) == 0 || cursor->trans->sync_lock_refs > 0); ++hammer_stats_btree_lookups; @@ -2439,6 +2498,12 @@ hammer_btree_mirror_propagate(hammer_cursor_t cursor, hammer_tid_t mirror_tid) error = hammer_cursor_up(cursor); if (error == 0) error = hammer_cursor_upgrade(cursor); + + /* + * We can ignore HAMMER_CURSOR_ITERATE_CHECK, the + * cursor will still be properly positioned for + * mirror propagation, just not for iterations. + */ while (error == EDEADLK) { hammer_recover_cursor(cursor); error = hammer_cursor_upgrade(cursor); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 614ab18371..5f4cb60429 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -711,7 +711,14 @@ hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode) * We have removed from the parent and collapsed the parent. * * Cursors in deadlock recovery are seeked upward to the parent so the - * btree_remove() recursion works properly. + * btree_remove() recursion works properly even though we have marked + * the cursor as requiring a reseek. + * + * This is the only cursor function which sets HAMMER_CURSOR_ITERATE_CHECK, + * meaning the cursor is no longer definitively pointing at an element + * within its iteration (if the cursor is being used to iterate). The + * iteration code will take this into account instead of asserting if the + * cursor is outside the iteration range. */ void hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index) @@ -730,6 +737,7 @@ hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index) if (cursor->leaf == &ondisk->elms[cursor->index].leaf) cursor->leaf = NULL; cursor->flags |= HAMMER_CURSOR_TRACKED_RIPOUT; + cursor->flags |= HAMMER_CURSOR_ITERATE_CHECK; cursor->node = parent; cursor->index = index; hammer_ref_node(parent); diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 20e97219a0..8888691b1f 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -140,6 +140,7 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_TRACKED 0x00040000 #define HAMMER_CURSOR_TRACKED_RIPOUT 0x00080000 #define HAMMER_CURSOR_LASTWASMEM 0x00100000 /* hammer_ip_next logic */ +#define HAMMER_CURSOR_ITERATE_CHECK 0x00200000 /* * Flags we can clear when reusing a cursor (we can clear all of them) -- 2.41.0