From: Matthew Dillon Date: Wed, 6 Aug 2008 15:41:56 +0000 (+0000) Subject: HAMMER 2.0:01 - MFC HAMMER 2.1:01 X-Git-Tag: v2.0.1~29 X-Git-Url: https://gitweb.dragonflybsd.org/~mneumann/dragonfly.git/commitdiff_plain/cc26124a73ba584773c5c339a4312d336f061e5f HAMMER 2.0:01 - MFC HAMMER 2.1:01 MFC buffer cache leak, invalidation races, b-tree deletion, nlinks, and rmdir fixes. --- diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 98fe2544c0..1731612580 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71.2.4 2008/08/02 21:24:27 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71.2.5 2008/08/06 15:41:56 dillon Exp $ */ /* @@ -2072,8 +2072,8 @@ hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid) * (cursor->node). Returns 0 on success, EDEADLK if we could not complete * the operation due to a deadlock, or some other error. * - * This routine is always called with an empty, locked leaf but may recurse - * into want-to-be-empty parents as part of its operation. + * This routine is initially called with an empty leaf and may be + * recursively called with single-element internal nodes. * * It should also be noted that when removing empty leaves we must be sure * to test and update mirror_tid because another thread may have deadlocked @@ -2118,6 +2118,13 @@ btree_remove(hammer_cursor_t cursor) * Attempt to remove the parent's reference to the child. If the * parent would become empty we have to recurse. If we fail we * leave the parent pointing to an empty leaf node. + * + * We have to recurse successfully before we can delete the internal + * node as it is illegal to have empty internal nodes. Even though + * the operation may be aborted we must still fixup any unlocked + * cursors as if we had deleted the element prior to recursing + * (by calling hammer_cursor_deleted_element()) so those cursors + * are properly forced up the chain by the recursion. */ if (parent->ondisk->count == 1) { /* @@ -2128,6 +2135,7 @@ btree_remove(hammer_cursor_t cursor) */ error = hammer_cursor_up_locked(cursor); if (error == 0) { + hammer_cursor_deleted_element(cursor->node, 0); error = btree_remove(cursor); if (error == 0) { hammer_modify_node_all(cursor->trans, node); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 180460772f..f035f9927f 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.41.2.1 2008/08/02 21:24:27 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.41.2.2 2008/08/06 15:41:56 dillon Exp $ */ /* @@ -497,7 +497,7 @@ hammer_unlock_cursor(hammer_cursor_t cursor, int also_ip) /* * Release the cursor's locks and track B-Tree operations on node. * While being tracked our cursor can be modified by other threads - * and node may be replaced. + * and the node may be replaced. */ if (cursor->parent) { hammer_unlock(&cursor->parent->lock); @@ -561,9 +561,15 @@ hammer_lock_cursor(hammer_cursor_t cursor, int also_ip) TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry); cursor->flags &= ~HAMMER_CURSOR_TRACKED; + /* + * If a ripout has occured iterations must re-test the (new) + * current element. Clearing ATEDISK prevents the element from + * being skipped and RETEST causes it to be re-tested. + */ if (cursor->flags & HAMMER_CURSOR_TRACKED_RIPOUT) { cursor->flags &= ~HAMMER_CURSOR_TRACKED_RIPOUT; cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + cursor->flags |= HAMMER_CURSOR_RETEST; } error = hammer_load_cursor_parent(cursor, 0); return(error); diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 4629401e07..3001051cb5 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.25.2.1 2008/08/02 21:24:27 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.25.2.2 2008/08/06 15:41:56 dillon Exp $ */ struct hammer_cmirror; @@ -129,7 +129,7 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_ATEMEM 0x0200 #define HAMMER_CURSOR_DISKEOF 0x0400 #define HAMMER_CURSOR_MEMEOF 0x0800 -#define HAMMER_CURSOR_DELBTREE 0x1000 /* ip_delete from b-tree */ +#define HAMMER_CURSOR_RETEST 0x1000 /* retest current element */ #define HAMMER_CURSOR_MIRROR_FILTERED 0x2000 /* mirror_tid filter */ #define HAMMER_CURSOR_ASOF 0x4000 /* as-of lookup */ #define HAMMER_CURSOR_CREATE_CHECK 0x8000 /* as-of lookup */ diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 69b2efcf52..fac61e8cef 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.3 2008/08/02 21:24:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.4 2008/08/06 15:41:56 dillon Exp $ */ #include "hammer.h" @@ -1873,19 +1873,19 @@ hammer_setup_child_callback(hammer_record_t rec, void *data) /* * If the target IP is already flushing in our group - * we are golden, otherwise make sure the target - * reflushes. + * we could associate the record, but target_ip has + * already synced ino_data to sync_ino_data and we + * would also have to adjust nlinks. Plus there are + * ordering issues for adds and deletes. + * + * Reflush downward if this is an ADD, and upward if + * this is a DEL. */ if (target_ip->flush_state == HAMMER_FST_FLUSH) { - if (target_ip->flush_group == flg) { - rec->flush_state = HAMMER_FST_FLUSH; - rec->flush_group = flg; - ++flg->refs; - hammer_ref(&rec->lock); - r = 1; - } else { + if (rec->flush_state == HAMMER_MEM_RECORD_ADD) + ip->flags |= HAMMER_INODE_REFLUSH; + else target_ip->flags |= HAMMER_INODE_REFLUSH; - } break; } diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 6ca02b9440..20127dbf57 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49.2.3 2008/08/02 21:24:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49.2.4 2008/08/06 15:41:56 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -269,10 +269,14 @@ hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) else bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0); if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { + hammer_ref(&iou->io.lock); hammer_io_clear_modify(&iou->io, 1); bundirty(bp); iou->io.reclaim = 1; - hammer_io_deallocate(bp); + iou->io.waitdep = 1; + KKASSERT(iou->io.lock.refs == 0); + hammer_rel_buffer(&iou->buffer, 0); + /*hammer_io_deallocate(bp);*/ } else { KKASSERT((bp->b_flags & B_LOCKED) == 0); bundirty(bp); @@ -332,7 +336,9 @@ hammer_io_release(struct hammer_io *io, int flush) } /* - * Wait for the IO to complete if asked to. + * Wait for the IO to complete if asked to. This occurs when + * the buffer must be disposed of definitively during an umount + * or buffer invalidation. */ if (io->waitdep && io->running) { hammer_io_wait(io); @@ -490,7 +496,9 @@ hammer_io_flush(struct hammer_io *io) * Do this before potentially blocking so any attempt to modify the * ondisk while we are blocked blocks waiting for us. */ + hammer_ref(&io->lock); hammer_io_clear_modify(io, 0); + hammer_unref(&io->lock); /* * Transfer ownership to the kernel and initiate I/O. @@ -650,6 +658,9 @@ hammer_modify_buffer_done(hammer_buffer_t buffer) * making bulk-modifications to the B-Tree. * * If inval is non-zero delayed adjustments are ignored. + * + * This routine may dereference related btree nodes and cause the + * buffer to be dereferenced. The caller must own a reference on io. */ void hammer_io_clear_modify(struct hammer_io *io, int inval) @@ -698,7 +709,8 @@ restart: goto restart; } } - + /* caller must still have ref on io */ + KKASSERT(io->lock.refs > 0); } /* @@ -951,9 +963,18 @@ hammer_io_checkwrite(struct buf *bp) /* * We can only clear the modified bit if the IO is not currently * undergoing modification. Otherwise we may miss changes. + * + * Only data and undo buffers can reach here. These buffers do + * not have terminal crc functions but we temporarily reference + * the IO anyway, just in case. */ - if (io->modify_refs == 0 && io->modified) + if (io->modify_refs == 0 && io->modified) { + hammer_ref(&io->lock); hammer_io_clear_modify(io, 0); + hammer_unref(&io->lock); + } else if (io->modified) { + KKASSERT(io->type == HAMMER_STRUCTURE_DATA_BUFFER); + } /* * The kernel is going to start the IO, set io->running. diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 144bb29ff3..e4d306cfee 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.3 2008/08/02 21:24:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.4 2008/08/06 15:41:56 dillon Exp $ */ #include "hammer.h" @@ -1084,7 +1084,7 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) if (record->type == HAMMER_MEM_RECORD_DEL) { error = hammer_btree_lookup(cursor); if (error == 0) { - /* XXX iprec? */ + KKASSERT(cursor->iprec == NULL); error = hammer_ip_delete_record(cursor, record->ip, trans->tid); if (error == 0) { @@ -1312,7 +1312,7 @@ hammer_ip_first(hammer_cursor_t cursor) /* * Clean up fields and setup for merged scan */ - cursor->flags &= ~HAMMER_CURSOR_DELBTREE; + cursor->flags &= ~HAMMER_CURSOR_RETEST; cursor->flags |= HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM; cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF; if (cursor->iprec) { @@ -1393,17 +1393,17 @@ next_btree: * records we have to get the next one. * * If we deleted the last on-disk record we had scanned ATEDISK will - * be clear and DELBTREE will be set, forcing a call to iterate. The + * be clear and RETEST will be set, forcing a call to iterate. The * fact that ATEDISK is clear causes iterate to re-test the 'current' * element. If ATEDISK is set, iterate will skip the 'current' * element. * * Get the next on-disk record */ - if (cursor->flags & (HAMMER_CURSOR_ATEDISK|HAMMER_CURSOR_DELBTREE)) { + if (cursor->flags & (HAMMER_CURSOR_ATEDISK|HAMMER_CURSOR_RETEST)) { if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { error = hammer_btree_iterate(cursor); - cursor->flags &= ~HAMMER_CURSOR_DELBTREE; + cursor->flags &= ~HAMMER_CURSOR_RETEST; if (error == 0) { cursor->flags &= ~HAMMER_CURSOR_ATEDISK; hammer_cache_node(&cursor->ip->cache[1], @@ -1750,8 +1750,8 @@ retry: * * This will also physically destroy the B-Tree entry and * data if the retention policy dictates. The function - * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() - * uses to perform a fixup. + * will set HAMMER_CURSOR_RETEST to cause hammer_ip_next() + * to retest the new 'current' element. */ if (truncating == 0 || hammer_cursor_ondisk(cursor)) { error = hammer_ip_delete_record(cursor, ip, trans->tid); @@ -1877,8 +1877,8 @@ retry: * Mark the record and B-Tree entry as deleted. This will * also physically delete the B-Tree entry, record, and * data if the retention policy dictates. The function - * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() - * uses to perform a fixup. + * will set HAMMER_CURSOR_RETEST to cause hammer_ip_next() + * to retest the new 'current' element. * * Directory entries (and delete-on-disk directory entries) * must be synced and cannot be deleted. @@ -2038,10 +2038,11 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags, * Adjust for the iteration. We have deleted the current * element and want to clear ATEDISK so the iteration does * not skip the element after, which now becomes the current - * element. + * element. This element must be re-tested if doing an + * iteration, which is handled by the RETEST flag. */ if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { - cursor->flags |= HAMMER_CURSOR_DELBTREE; + cursor->flags |= HAMMER_CURSOR_RETEST; cursor->flags &= ~HAMMER_CURSOR_ATEDISK; } @@ -2076,12 +2077,14 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags, error = hammer_btree_delete(cursor); if (error == 0) { /* - * This forces a fixup for the iteration because - * the cursor is now either sitting at the 'next' - * element or sitting at the end of a leaf. + * The deletion moves the next element (if any) to + * the current element position. We must clear + * ATEDISK so this element is not skipped and we + * must set RETEST to force any iteration to re-test + * the element. */ if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { - cursor->flags |= HAMMER_CURSOR_DELBTREE; + cursor->flags |= HAMMER_CURSOR_RETEST; cursor->flags &= ~HAMMER_CURSOR_ATEDISK; } } diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index de20a32401..021cddf52f 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.69.2.4 2008/08/02 21:24:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.69.2.5 2008/08/06 15:41:56 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -653,8 +653,9 @@ found: /* * This is used by the direct-read code to deal with large-data buffers * created by the reblocker and mirror-write code. The direct-read code - * bypasses the HAMMER buffer subsystem and so any aliased dirty hammer - * buffers must be fully synced to disk before we can issue the direct-read. + * bypasses the HAMMER buffer subsystem and so any aliased dirty or write- + * running hammer buffers must be fully synced to disk before we can issue + * the direct-read. * * This code path is not considered critical as only the rebocker and * mirror-write code will create large-data buffers via the HAMMER buffer @@ -673,13 +674,16 @@ hammer_sync_buffers(hammer_mount_t hmp, hammer_off_t base_offset, int bytes) while (bytes > 0) { buffer = RB_LOOKUP(hammer_buf_rb_tree, &hmp->rb_bufs_root, base_offset); - if (buffer && buffer->io.modified) { + if (buffer && (buffer->io.modified || buffer->io.running)) { error = hammer_ref_buffer(buffer); - if (error == 0 && buffer->io.modified) { - hammer_io_write_interlock(&buffer->io); - hammer_io_flush(&buffer->io); - hammer_io_done_interlock(&buffer->io); + if (error == 0) { hammer_io_wait(&buffer->io); + if (buffer->io.modified) { + hammer_io_write_interlock(&buffer->io); + hammer_io_flush(&buffer->io); + hammer_io_done_interlock(&buffer->io); + hammer_io_wait(&buffer->io); + } hammer_rel_buffer(buffer, 0); } } @@ -718,6 +722,7 @@ hammer_del_buffers(hammer_mount_t hmp, hammer_off_t base_offset, KKASSERT(buffer->zone2_offset == zone2_offset); hammer_io_clear_modify(&buffer->io, 1); buffer->io.reclaim = 1; + buffer->io.waitdep = 1; KKASSERT(buffer->volume == volume); hammer_rel_buffer(buffer, 0); } diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 9c2dfdaf6f..e98d5bb747 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.3 2008/08/02 21:24:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.4 2008/08/06 15:41:56 dillon Exp $ */ #include @@ -1652,6 +1652,8 @@ hammer_vop_nrmdir(struct vop_nrmdir_args *ap) int error; dip = VTOI(ap->a_dvp); + if (dip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY) + return(ENOTDIR); if (hammer_nohistory(dip) == 0 && (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) {