From: Matthew Dillon Date: Mon, 11 Apr 2011 17:07:37 +0000 (-0700) Subject: HAMMER VFS - Fix degenerate stall condition in flusher during unmount X-Git-Tag: v2.10.0~56^2~26 X-Git-Url: http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/e86903d84f840af38d1b452a6a6c624702373751 HAMMER VFS - Fix degenerate stall condition in flusher during unmount * Fix a case where the flusher can stall during an unmount. * Rework the flusher sequence numbers to always allocate a sequence number when a flush is requested, remove the flusher.act field, and rejigger the code a bit. * This also cleans up an edge case when a full sync is inserted (when taking snapshots, filesystem sync, etc), by inserting several sequence numbers to completely flush the UNDO/REDO FIFO before moving on to the next active flush group. Reported-by: Sepherosa Ziehau , Francois Tigeot , numerous others. --- diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 11f20b4..07ef83f 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -838,8 +838,7 @@ TAILQ_HEAD(hammer_flusher_info_list, hammer_flusher_info); struct hammer_flusher { int signal; /* flusher thread sequencer */ - int act; /* currently active flush group */ - int done; /* set to act when complete */ + int done; /* last completed flush group */ int next; /* next unallocated flg seqno */ int group_lock; /* lock sequencing of the next flush */ int exiting; /* request master exit */ diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index 998da92..6e5da43 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -44,7 +44,7 @@ static void hammer_flusher_master_thread(void *arg); static void hammer_flusher_slave_thread(void *arg); -static void hammer_flusher_flush(hammer_mount_t hmp); +static int hammer_flusher_flush(hammer_mount_t hmp, int *nomorep); static void hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans); @@ -95,8 +95,9 @@ hammer_flusher_sync(hammer_mount_t hmp) * * Returns the sequence number of the last closed flush group, * which may be close_flg. When syncing to the end if there - * are no flush groups pending we still cycle the flusher, so - * we return the next seq number not yet allocated. + * are no flush groups pending we still cycle the flusher, and + * must allocate a sequence number to placemark the spot even + * though no flush group will ever be associated with it. */ int hammer_flusher_async(hammer_mount_t hmp, hammer_flush_group_t close_flg) @@ -125,7 +126,12 @@ hammer_flusher_async(hammer_mount_t hmp, hammer_flush_group_t close_flg) if (hmp->flusher.td) { if (hmp->flusher.signal++ == 0) wakeup(&hmp->flusher.signal); - seq = flg ? flg->seq : hmp->flusher.next; + if (flg) { + seq = flg->seq; + } else { + seq = hmp->flusher.next; + ++hmp->flusher.next; + } } else { seq = hmp->flusher.done; } @@ -162,19 +168,15 @@ hammer_flusher_async_one(hammer_mount_t hmp) } /* - * Wait for the flusher to get to the specified sequence number. - * Signal the flusher as often as necessary to keep it going. + * Wait for the flusher to finish flushing the specified sequence + * number. The flush is already running and will signal us on + * each completion. */ void hammer_flusher_wait(hammer_mount_t hmp, int seq) { - while ((int)(seq - hmp->flusher.done) > 0) { - if ((int)(seq - hmp->flusher.act) > 0) { - if (hmp->flusher.signal++ == 0) - wakeup(&hmp->flusher.signal); - } + while ((int)(seq - hmp->flusher.done) > 0) tsleep(&hmp->flusher.done, 0, "hmrfls", 0); - } } void @@ -193,7 +195,6 @@ hammer_flusher_create(hammer_mount_t hmp) int i; hmp->flusher.signal = 0; - hmp->flusher.act = 0; hmp->flusher.done = 0; hmp->flusher.next = 1; hammer_ref(&hmp->flusher.finalize_lock); @@ -247,8 +248,9 @@ hammer_flusher_destroy(hammer_mount_t hmp) static void hammer_flusher_master_thread(void *arg) { - hammer_flush_group_t flg; hammer_mount_t hmp; + int seq; + int nomore; hmp = arg; @@ -256,22 +258,22 @@ hammer_flusher_master_thread(void *arg) for (;;) { /* - * Flush all closed flgs. If no flg's are closed we still - * do at least one flush cycle as we may have to update - * the UNDO FIFO even if no inodes are queued. + * Flush all sequence numbers up to but not including .next, + * or until an open flush group is encountered. */ for (;;) { while (hmp->flusher.group_lock) - tsleep(&hmp->flusher.group_lock, 0, "hmrhld", 0); + tsleep(&hmp->flusher.group_lock, 0, "hmrhld",0); hammer_flusher_clean_loose_ios(hmp); - hammer_flusher_flush(hmp); - hmp->flusher.done = hmp->flusher.act; + + seq = hammer_flusher_flush(hmp, &nomore); + hmp->flusher.done = seq; wakeup(&hmp->flusher.done); - flg = TAILQ_FIRST(&hmp->flush_group_list); - if (flg == NULL || flg->closed == 0) - break; + if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) break; + if (nomore) + break; } /* @@ -281,13 +283,7 @@ hammer_flusher_master_thread(void *arg) break; while (hmp->flusher.signal == 0) tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0); - - /* - * Flush for each count on signal but only allow one extra - * flush request to build up. - */ - if (--hmp->flusher.signal != 0) - hmp->flusher.signal = 1; + hmp->flusher.signal = 0; } /* @@ -300,10 +296,14 @@ hammer_flusher_master_thread(void *arg) } /* - * Flush all inodes in the current flush group. + * Flush the next sequence number until an open flush group is encountered + * or we reach (next). Not all sequence numbers will have flush groups + * associated with them. These require that the UNDO/REDO FIFO still be + * flushed since it can take at least one additional run to synchronize + * the FIFO, and more to also synchronize the reserve structures. */ -static void -hammer_flusher_flush(hammer_mount_t hmp) +static int +hammer_flusher_flush(hammer_mount_t hmp, int *nomorep) { hammer_flusher_info_t info; hammer_flush_group_t flg; @@ -312,29 +312,45 @@ hammer_flusher_flush(hammer_mount_t hmp) hammer_inode_t next_ip; int slave_index; int count; + int seq; /* - * Just in-case there's a flush race on mount + * Just in-case there's a flush race on mount. Seq number + * does not change. */ if (TAILQ_FIRST(&hmp->flusher.ready_list) == NULL) { - return; + *nomorep = 1; + return (hmp->flusher.done); } + *nomorep = 0; /* - * Set the actively flushing sequence number. If no flushable - * groups are present allocate a dummy sequence number for the - * operation. + * Flush the next sequence number. Sequence numbers can exist + * without an assigned flush group, indicating that just a FIFO flush + * should occur. */ + seq = hmp->flusher.done + 1; flg = TAILQ_FIRST(&hmp->flush_group_list); if (flg == NULL) { - hmp->flusher.act = hmp->flusher.next; - ++hmp->flusher.next; - } else if (flg->closed) { - KKASSERT(flg->running == 0); - flg->running = 1; - hmp->flusher.act = flg->seq; - if (hmp->fill_flush_group == flg) - hmp->fill_flush_group = TAILQ_NEXT(flg, flush_entry); + if (seq == hmp->flusher.next) { + *nomorep = 1; + return (hmp->flusher.done); + } + } else if (seq == flg->seq) { + if (flg->closed) { + KKASSERT(flg->running == 0); + flg->running = 1; + if (hmp->fill_flush_group == flg) { + hmp->fill_flush_group = + TAILQ_NEXT(flg, flush_entry); + } + } else { + *nomorep = 1; + return (hmp->flusher.done); + } + } else { + KKASSERT((int)(flg->seq - seq) > 0); + flg = NULL; } /* @@ -348,9 +364,7 @@ hammer_flusher_flush(hammer_mount_t hmp) ++count; if (hammer_debug_general & 0x0001) { kprintf("hammer_flush %d ttl=%d recs=%d\n", - hmp->flusher.act, - flg->total_count, - flg->refs); + flg->seq, flg->total_count, flg->refs); } if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) break; @@ -465,10 +479,11 @@ hammer_flusher_flush(hammer_mount_t hmp) * it can no longer be reused. */ while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) { - if (resv->flush_group != hmp->flusher.act) + if ((int)(resv->flush_group - seq) > 0) break; hammer_reserve_clrdelay(hmp, resv); } + return (seq); } diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index a00c477..e797f19 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -348,7 +348,7 @@ hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, int seq; localization = (u_int32_t)mirror->pfs_id << 16; - seq = trans->hmp->flusher.act; + seq = trans->hmp->flusher.done; /* * Validate the mirror structure and relocalize the tracking keys. diff --git a/sys/vfs/hammer/hammer_pfs.c b/sys/vfs/hammer/hammer_pfs.c index 6699a53..182da0b 100644 --- a/sys/vfs/hammer/hammer_pfs.c +++ b/sys/vfs/hammer/hammer_pfs.c @@ -364,7 +364,7 @@ hammer_pfs_rollback(hammer_transaction_t trans, key_cur.create_tid = 1; key_cur.rec_type = HAMMER_MIN_RECTYPE; - seq = trans->hmp->flusher.act; + seq = trans->hmp->flusher.done; retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); diff --git a/sys/vfs/hammer/hammer_prune.c b/sys/vfs/hammer/hammer_prune.c index aed0de9..66c6a63 100644 --- a/sys/vfs/hammer/hammer_prune.c +++ b/sys/vfs/hammer/hammer_prune.c @@ -94,7 +94,7 @@ hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, goto failed; prune->elms = copy_elms; - seq = trans->hmp->flusher.act; + seq = trans->hmp->flusher.done; /* * Scan backwards. Retries typically occur if a deadlock is detected. diff --git a/sys/vfs/hammer/hammer_rebalance.c b/sys/vfs/hammer/hammer_rebalance.c index e232617..8fcc347 100644 --- a/sys/vfs/hammer/hammer_rebalance.c +++ b/sys/vfs/hammer/hammer_rebalance.c @@ -82,7 +82,7 @@ hammer_ioc_rebalance(hammer_transaction_t trans, hammer_inode_t ip, hammer_btree_lcache_init(trans->hmp, &lcache, 2); - seq = trans->hmp->flusher.act; + seq = trans->hmp->flusher.done; /* * Scan forwards. Retries typically occur if a deadlock is detected. diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index b42febe..327c020 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -89,7 +89,7 @@ hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, reblock->key_cur.localization += ip->obj_localization; checkspace_count = 0; - seq = trans->hmp->flusher.act; + seq = trans->hmp->flusher.done; retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) {