HAMMER 53D/Many: Stabilization
authorMatthew Dillon <dillon@dragonflybsd.org>
Tue, 10 Jun 2008 00:40:31 +0000 (00:40 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Tue, 10 Jun 2008 00:40:31 +0000 (00:40 +0000)
* Fix an overwrite bug with direct write which could result in file
  corruption.

* Reserve just-freed big blocks for two flush cycles to prevent HAMMER from
  overwriting destroyed data so it does not become corrupt if the system
  crashes.  This is needed because the recover code does not record UNDOs
  for data (nor do we want it to).

* More I/O subsystem work.  There may still be an ellusive panic related
  to calls to regetblk().

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_blockmap.c
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_freemap.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_prune.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 3701feb..81e67ea 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.76 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.77 2008/06/10 00:40:31 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -402,6 +402,7 @@ struct hammer_io {
        u_int           waitdep : 1;    /* flush waits for dependancies */
        u_int           recovered : 1;  /* has recovery ref */
        u_int           waitmod : 1;    /* waiting for modify_refs */
+       u_int           reclaim : 1;    /* reclaim requested */
 };
 
 typedef struct hammer_io *hammer_io_t;
@@ -436,6 +437,7 @@ struct hammer_buffer {
        struct hammer_volume *volume;
        hammer_off_t zoneX_offset;
        hammer_off_t zone2_offset;
+       struct hammer_reserve *resv;
        struct hammer_node_list clist;
 };
 
@@ -497,10 +499,13 @@ union hammer_io_structure {
 typedef union hammer_io_structure *hammer_io_structure_t;
 
 /*
- * Allocation holes are recorded for a short period of time in an attempt
- * to use up the space.
+ * Allocation holes are recorded when an allocation does not fit within a
+ * buffer.  Later allocations which might fit may then be satisfied from
+ * a recorded hole.  The resv reference prevents the big block from being
+ * allocated out of via the normal blockmap mechanism.
+ *
+ * This is strictly a heuristic.
  */
-
 #define HAMMER_MAX_HOLES       8
 
 struct hammer_hole;
@@ -521,10 +526,21 @@ struct hammer_hole {
 
 typedef struct hammer_hole *hammer_hole_t;
 
+/*
+ * The reserve structure prevents the blockmap from allocating
+ * out of a reserved bigblock.  Such reservations are used by
+ * the direct-write mechanism.
+ *
+ * The structure is also used to hold off on reallocations of
+ * big blocks from the freemap until flush dependancies have
+ * been dealt with.
+ */
 struct hammer_reserve {
        RB_ENTRY(hammer_reserve) rb_node;
-       hammer_off_t    zone_offset;
+       TAILQ_ENTRY(hammer_reserve) delay_entry;
+       int             flush_group;
        int             refs;
+       hammer_off_t    zone_offset;
 };
 
 typedef struct hammer_reserve *hammer_reserve_t;
@@ -532,7 +548,10 @@ typedef struct hammer_reserve *hammer_reserve_t;
 #include "hammer_cursor.h"
 
 /*
- * Undo history tracking
+ * The undo structure tracks recent undos to avoid laying down duplicate
+ * undos within a flush group, saving us a significant amount of overhead.
+ *
+ * This is strictly a heuristic.
  */
 #define HAMMER_MAX_UNDOS       256
 
@@ -608,6 +627,7 @@ struct hammer_mount {
        int                     undo_alloc;
        TAILQ_HEAD(, hammer_undo)  undo_lru_list;
        TAILQ_HEAD(, hammer_inode) flush_list;
+       TAILQ_HEAD(, hammer_reserve) delay_list;
        TAILQ_HEAD(, hammer_objid_cache) objid_cache_list;
 };
 
@@ -638,6 +658,7 @@ extern int hammer_debug_btree;
 extern int hammer_debug_tid;
 extern int hammer_debug_recover;
 extern int hammer_debug_recover_faults;
+extern int hammer_debug_write_release;
 extern int hammer_count_inodes;
 extern int hammer_count_reclaiming;
 extern int hammer_count_records;
@@ -876,8 +897,6 @@ int  hammer_ip_delete_range_all(hammer_cursor_t cursor, hammer_inode_t ip,
                        int *countp);
 int  hammer_ip_sync_data(hammer_cursor_t cursor, hammer_inode_t ip,
                        int64_t offset, void *data, int bytes);
-int  hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip,
-                       off_t file_offset, void *data, int bytes);
 int  hammer_ip_sync_record(hammer_transaction_t trans, hammer_record_t rec);
 int  hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t rec);
 
@@ -892,7 +911,6 @@ int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
 void hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset);
 void hammer_io_release(struct hammer_io *io, int flush);
 void hammer_io_flush(struct hammer_io *io);
-void hammer_io_clear_modify(struct hammer_io *io);
 void hammer_io_waitdep(struct hammer_io *io);
 int hammer_io_direct_read(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
                          struct bio *bio);
@@ -900,6 +918,8 @@ int hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
                          struct bio *bio);
 void hammer_io_write_interlock(hammer_io_t io);
 void hammer_io_done_interlock(hammer_io_t io);
+void hammer_io_clear_modify(struct hammer_io *io);
+void hammer_io_clear_modlist(struct hammer_io *io);
 void hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume,
                        void *base, int len);
 void hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer,
index a3b70fb..38e5d02 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.16 2008/06/08 18:16:26 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.17 2008/06/10 00:40:31 dillon Exp $
  */
 
 /*
@@ -655,6 +655,7 @@ hammer_blockmap_free(hammer_transaction_t trans,
                KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) & 
                          ~HAMMER_LARGEBLOCK_MASK64) == 0);
        } else {
+               bytes = -((-bytes + 15) & ~15);
                KKASSERT(bytes >= -HAMMER_BUFSIZE);
        }
        zone = HAMMER_ZONE_DECODE(bmap_off);
index 130dedc..845fb5b 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.20 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.21 2008/06/10 00:40:31 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -157,14 +157,18 @@ hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
 }
 
 /*
- * Flush all inodes in the current flush group
+ * Flush all inodes in the current flush group.
  */
 static void
 hammer_flusher_flush(hammer_mount_t hmp)
 {
        struct hammer_transaction trans;
        hammer_inode_t ip;
+       hammer_reserve_t resv;
 
+       /*
+        * Flush the inodes
+        */
        hammer_start_transaction_fls(&trans, hmp);
        while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
                if (ip->flush_group != hmp->flusher_act)
@@ -174,6 +178,21 @@ hammer_flusher_flush(hammer_mount_t hmp)
        }
        hammer_flusher_finalize(&trans, 1);
        hmp->flusher_tid = trans.tid;
+
+       /*
+        * Clean up any freed big-blocks (typically zone-2). 
+        * resv->flush_group is typically set several flush groups ahead
+        * of the free to ensure that the freed block is not reused until
+        * it can no longer be reused.
+        */
+       while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) {
+               if (resv->flush_group != hmp->flusher_act)
+                       break;
+               TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
+               hammer_blockmap_reserve_complete(hmp, resv);
+       }
+
+
        hammer_done_transaction(&trans);
 }
 
@@ -186,8 +205,10 @@ hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
 {
        hammer_mount_t hmp = ip->hmp;
 
+       /*hammer_lock_ex(&ip->lock);*/
        ip->error = hammer_sync_inode(ip);
        hammer_flush_inode_done(ip);
+       /*hammer_unlock(&ip->lock);*/
 
        if (hammer_must_finalize_undo(hmp)) {
                kprintf("HAMMER: Warning: UNDO area too small!");
index bf89d4a..1bb77cc 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.14 2008/06/08 18:16:26 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.15 2008/06/10 00:40:31 dillon Exp $
  */
 
 /*
@@ -46,6 +46,8 @@
 
 #include "hammer.h"
 
+static int hammer_freemap_reserved(hammer_mount_t hmp, hammer_off_t zone2_base);
+
 /*
  * Backend big-block allocation
  */
@@ -53,6 +55,7 @@ hammer_off_t
 hammer_freemap_alloc(hammer_transaction_t trans, hammer_off_t owner,
                     int *errorp)
 {
+       hammer_mount_t hmp;
        hammer_volume_ondisk_t ondisk;
        hammer_off_t layer1_offset;
        hammer_off_t layer2_offset;
@@ -65,26 +68,27 @@ hammer_freemap_alloc(hammer_transaction_t trans, hammer_off_t owner,
        int vol_no;
        int loops = 0;
 
+       hmp = trans->hmp;
        *errorp = 0;
        ondisk = trans->rootvol->ondisk;
 
-       hammer_lock_ex(&trans->hmp->free_lock);
+       hammer_lock_ex(&hmp->free_lock);
 
-       blockmap = &trans->hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
+       blockmap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
        result_offset = blockmap->next_offset;
        vol_no = HAMMER_VOL_DECODE(result_offset);
        for (;;) { 
                layer1_offset = blockmap->phys_offset +
                                HAMMER_BLOCKMAP_LAYER1_OFFSET(result_offset);
 
-               layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
+               layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
                if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
                        /*
                         * End-of-volume, try next volume.
                         */
 new_volume:
                        ++vol_no;
-                       if (vol_no >= trans->hmp->nvolumes)
+                       if (vol_no >= hmp->nvolumes)
                                vol_no = 0;
                        result_offset = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0);
                        if (vol_no == 0 && ++loops == 2) {
@@ -95,9 +99,11 @@ new_volume:
                } else {
                        layer2_offset = layer1->phys_offset +
                                HAMMER_BLOCKMAP_LAYER2_OFFSET(result_offset);
-                       layer2 = hammer_bread(trans->hmp, layer2_offset, errorp,
+                       layer2 = hammer_bread(hmp, layer2_offset, errorp,
                                              &buffer2);
-                       if (layer2->u.owner == HAMMER_BLOCKMAP_FREE) {
+
+                       if (layer2->u.owner == HAMMER_BLOCKMAP_FREE &&
+                           !hammer_freemap_reserved(hmp, result_offset)) {
                                hammer_modify_buffer(trans, buffer2,
                                                     layer2, sizeof(*layer2));
                                layer2->u.owner = owner &
@@ -111,7 +117,7 @@ new_volume:
                                                     trans->rootvol,
                                                     vol0_stat_freebigblocks);
                                --ondisk->vol0_stat_freebigblocks;
-                               trans->hmp->copy_stat_freebigblocks =
+                               hmp->copy_stat_freebigblocks =
                                        ondisk->vol0_stat_freebigblocks;
                                hammer_modify_volume_done(trans->rootvol);
                                break;
@@ -136,7 +142,7 @@ new_volume:
        blockmap->next_offset = result_offset + HAMMER_LARGEBLOCK_SIZE;
        hammer_modify_volume_done(trans->rootvol);
 done:
-       hammer_unlock(&trans->hmp->free_lock);
+       hammer_unlock(&hmp->free_lock);
        if (buffer1)
                hammer_rel_buffer(buffer1, 0);
        if (buffer2)
@@ -151,6 +157,7 @@ void
 hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, 
                    hammer_off_t owner, int *errorp)
 {
+       hammer_mount_t hmp;
        hammer_volume_ondisk_t ondisk;
        hammer_off_t layer1_offset;
        hammer_off_t layer2_offset;
@@ -159,24 +166,39 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset,
        hammer_buffer_t buffer2 = NULL;
        struct hammer_blockmap_layer1 *layer1;
        struct hammer_blockmap_layer2 *layer2;
+       hammer_reserve_t resv;
+
+       hmp = trans->hmp;
 
        KKASSERT((phys_offset & HAMMER_LARGEBLOCK_MASK64) == 0);
+       KKASSERT(hammer_freemap_reserved(hmp, phys_offset) == 0);
+
+       /*
+        * Create a reservation
+        */
+       resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO);
+       resv->refs = 1;
+       resv->zone_offset = phys_offset;
+       resv->flush_group = hmp->flusher_next + 1;
+       RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
+       TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
+       ++hammer_count_reservations;
+
+       hammer_lock_ex(&hmp->free_lock);
 
        *errorp = 0;
        ondisk = trans->rootvol->ondisk;
 
-       hammer_lock_ex(&trans->hmp->free_lock);
-
-       blockmap = &trans->hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
+       blockmap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
        layer1_offset = blockmap->phys_offset +
                        HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
-       layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
+       layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
 
        KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
 
        layer2_offset = layer1->phys_offset +
                        HAMMER_BLOCKMAP_LAYER2_OFFSET(phys_offset);
-       layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2);
+       layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
 
        KKASSERT(layer2->u.owner == (owner & ~HAMMER_LARGEBLOCK_MASK64));
        hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
@@ -190,9 +212,9 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset,
                                   vol0_stat_freebigblocks);
        ++ondisk->vol0_stat_freebigblocks;
        hammer_modify_volume_done(trans->rootvol);
-       trans->hmp->copy_stat_freebigblocks = ondisk->vol0_stat_freebigblocks;
+       hmp->copy_stat_freebigblocks = ondisk->vol0_stat_freebigblocks;
 
-       hammer_unlock(&trans->hmp->free_lock);
+       hammer_unlock(&hmp->free_lock);
 
        if (buffer1)
                hammer_rel_buffer(buffer1, 0);
@@ -200,6 +222,17 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset,
                hammer_rel_buffer(buffer2, 0);
 }
 
+/*
+ * Check whether a free block has been reserved in zone-2.
+ */
+static int
+hammer_freemap_reserved(hammer_mount_t hmp, hammer_off_t zone2_base)
+{
+       if (RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, zone2_base))
+               return(1);
+       return(0);
+}
+
 /*
  * Check space availability
  */
index d43eae3..e037591 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.36 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.37 2008/06/10 00:40:31 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -88,10 +88,8 @@ hammer_io_disassociate(hammer_io_structure_t iou, int elseit)
        /*
         * If the buffer was locked someone wanted to get rid of it.
         */
-       if (bp->b_flags & B_LOCKED) {
+       if (bp->b_flags & B_LOCKED)
                bp->b_flags &= ~B_LOCKED;
-               bp->b_flags |= B_RELBUF;
-       }
 
        /*
         * elseit is 0 when called from the kernel path, the caller is
@@ -100,10 +98,13 @@ hammer_io_disassociate(hammer_io_structure_t iou, int elseit)
        if (elseit) {
                KKASSERT(iou->io.released == 0);
                iou->io.released = 1;
+               if (iou->io.reclaim)
+                       bp->b_flags |= B_NOCACHE|B_RELBUF;
                bqrelse(bp);
        } else {
                KKASSERT(iou->io.released);
        }
+       iou->io.reclaim = 0;
 
        switch(iou->io.type) {
        case HAMMER_STRUCTURE_VOLUME:
@@ -224,6 +225,7 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io)
 void
 hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset)
 {
+       hammer_io_structure_t iou;
        hammer_off_t phys_offset;
        struct buf *bp;
 
@@ -231,10 +233,15 @@ hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset)
                      (zone2_offset & HAMMER_OFF_SHORT_MASK);
        if (findblk(volume->devvp, phys_offset)) {
                bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0);
-               if (LIST_FIRST(&bp->b_dep) != NULL) {
+               if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) {
+                       hammer_io_clear_modify(&iou->io);
+                       bundirty(bp);
+                       iou->io.reclaim = 1;
                        hammer_io_deallocate(bp);
                } else {
-                       bp->b_flags |= B_RELBUF;
+                       KKASSERT((bp->b_flags & B_LOCKED) == 0);
+                       bundirty(bp);
+                       bp->b_flags |= B_NOCACHE|B_RELBUF;
                        brelse(bp);
                }
        }
@@ -293,7 +300,7 @@ hammer_io_release(struct hammer_io *io, int flush)
         * that our bioops can override kernel decisions with regards to
         * the buffer).
         */
-       if (flush && io->modified == 0 && io->running == 0) {
+       if ((flush || io->reclaim) && io->modified == 0 && io->running == 0) {
                /*
                 * Always disassociate the bp if an explicit flush
                 * was requested and the IO completed with no error
@@ -332,8 +339,12 @@ hammer_io_release(struct hammer_io *io, int flush)
                if (bp->b_flags & B_LOCKED) {
                        hammer_io_disassociate(iou, 1);
                } else {
-                       io->released = 1;
-                       bqrelse(bp);
+                       if (io->reclaim) {
+                               hammer_io_disassociate(iou, 1);
+                       } else {
+                               io->released = 1;
+                               bqrelse(bp);
+                       }
                }
        } else {
                /*
@@ -345,7 +356,7 @@ hammer_io_release(struct hammer_io *io, int flush)
                crit_enter();
                if (io->running == 0 && (bp->b_flags & B_LOCKED)) {
                        regetblk(bp);
-                       if (bp->b_flags & B_LOCKED) {
+                       if ((bp->b_flags & B_LOCKED) || io->reclaim) {
                                io->released = 0;
                                hammer_io_disassociate(iou, 1);
                        } else {
@@ -409,15 +420,7 @@ hammer_io_flush(struct hammer_io *io)
         * Do this before potentially blocking so any attempt to modify the
         * ondisk while we are blocked blocks waiting for us.
         */
-       KKASSERT(io->mod_list != NULL);
-       if (io->mod_list == &io->hmp->volu_list ||
-           io->mod_list == &io->hmp->meta_list) {
-               --io->hmp->locked_dirty_count;
-               --hammer_count_dirtybufs;
-       }
-       TAILQ_REMOVE(io->mod_list, io, mod_entry);
-       io->mod_list = NULL;
-       io->modified = 0;
+       hammer_io_clear_modify(io);
 
        /*
         * Transfer ownership to the kernel and initiate I/O.
@@ -589,34 +592,37 @@ hammer_modify_buffer_done(hammer_buffer_t buffer)
 }
 
 /*
- * Mark an entity as not being dirty any more -- this usually occurs when
- * the governing a-list has freed the entire entity.
- *
- * XXX
+ * Mark an entity as not being dirty any more.
  */
 void
 hammer_io_clear_modify(struct hammer_io *io)
 {
-#if 0
-       struct buf *bp;
-
-       io->modified = 0;
-       XXX mod_list/entry
-       if ((bp = io->bp) != NULL) {
-               if (io->released) {
-                       regetblk(bp);
-                       /* BUF_KERNPROC(io->bp); */
-               } else {
-                       io->released = 1;
-               }
-               if (io->modified == 0) {
-                       bundirty(bp);
-                       bqrelse(bp);
-               } else {
-                       bdwrite(bp);
+       if (io->modified) {
+               KKASSERT(io->mod_list != NULL);
+               if (io->mod_list == &io->hmp->volu_list ||
+                   io->mod_list == &io->hmp->meta_list) {
+                       --io->hmp->locked_dirty_count;
+                       --hammer_count_dirtybufs;
                }
+               TAILQ_REMOVE(io->mod_list, io, mod_entry);
+               io->mod_list = NULL;
+               io->modified = 0;
+       }
+}
+
+/*
+ * Clear the IO's modify list.  Even though the IO is no longer modified
+ * it may still be on the lose_list.  This routine is called just before
+ * the governing hammer_buffer is destroyed.
+ */
+void
+hammer_io_clear_modlist(struct hammer_io *io)
+{
+       if (io->mod_list) {
+               KKASSERT(io->mod_list == &io->hmp->lose_list);
+               TAILQ_REMOVE(io->mod_list, io, mod_entry);
+               io->mod_list = NULL;
        }
-#endif
 }
 
 /************************************************************************
@@ -776,17 +782,8 @@ hammer_io_checkwrite(struct buf *bp)
         * We can only clear the modified bit if the IO is not currently
         * undergoing modification.  Otherwise we may miss changes.
         */
-       if (io->modify_refs == 0 && io->modified) {
-               KKASSERT(io->mod_list != NULL);
-               if (io->mod_list == &io->hmp->volu_list ||
-                   io->mod_list == &io->hmp->meta_list) {
-                       --io->hmp->locked_dirty_count;
-                       --hammer_count_dirtybufs;
-               }
-               TAILQ_REMOVE(io->mod_list, io, mod_entry);
-               io->mod_list = NULL;
-               io->modified = 0;
-       }
+       if (io->modify_refs == 0 && io->modified)
+               hammer_io_clear_modify(io);
 
        /*
         * The kernel is going to start the IO, set io->running.
@@ -860,6 +857,8 @@ hammer_io_direct_read(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
                hammer_rel_volume(volume, 0);
        }
        if (error) {
+               kprintf("hammer_direct_read: failed @ %016llx\n",
+                       leaf->data_offset);
                bp = bio->bio_buf;
                bp->b_error = error;
                bp->b_flags |= B_ERROR;
@@ -914,6 +913,8 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
                        nbio = push_bio(bio);
                        nbio->bio_offset = volume->ondisk->vol_buf_beg +
                                           zone2_offset;
+                       if (hammer_debug_write_release & 1)
+                               nbio->bio_buf->b_flags |= B_RELBUF|B_NOCACHE;
                        vn_strategy(volume->devvp, nbio);
                }
                hammer_rel_volume(volume, 0);
@@ -926,12 +927,14 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
                        hammer_io_modify(&buffer->io, 1);
                        bcopy(bp->b_data, ptr, leaf->data_len);
                        hammer_io_modify_done(&buffer->io);
-                       hammer_rel_buffer(buffer, 0);
+                       hammer_rel_buffer(buffer, (hammer_debug_write_release & 2));
                        bp->b_resid = 0;
                        biodone(bio);
                }
        }
        if (error) {
+               kprintf("hammer_direct_write: failed @ %016llx\n",
+                       leaf->data_offset);
                bp = bio->bio_buf;
                bp->b_resid = 0;
                bp->b_error = EIO;
index cf2b472..7c8c417 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.63 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.64 2008/06/10 00:40:31 dillon Exp $
  */
 
 #include "hammer.h"
@@ -63,6 +63,11 @@ hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2)
        if (rec1->leaf.base.key > rec2->leaf.base.key)
                return(1);
 
+#if 0
+       /*
+        * XXX create_tid is set during sync, memory records are always
+        * current.  Do not match against create_tid.
+        */
        if (rec1->leaf.base.create_tid == 0) {
                if (rec2->leaf.base.create_tid == 0)
                        return(0);
@@ -75,6 +80,7 @@ hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2)
                return(-1);
        if (rec1->leaf.base.create_tid > rec2->leaf.base.create_tid)
                return(1);
+#endif
 
        /*
         * Never match against an item deleted by the front-end.
@@ -103,6 +109,11 @@ hammer_rec_cmp(hammer_base_elm_t elm, hammer_record_t rec)
         if (elm->key > rec->leaf.base.key)
                 return(2);
 
+#if 0
+       /*
+        * XXX create_tid is set during sync, memory records are always
+        * current.  Do not match against create_tid.
+        */
        if (elm->create_tid == 0) {
                if (rec->leaf.base.create_tid == 0)
                        return(0);
@@ -114,6 +125,12 @@ hammer_rec_cmp(hammer_base_elm_t elm, hammer_record_t rec)
                return(-1);
        if (elm->create_tid > rec->leaf.base.create_tid)
                return(1);
+#endif
+       /*
+        * Never match against an item deleted by the front-end.
+        */
+       if (rec->flags & HAMMER_RECF_DELETED_FE)
+               return(1);
         return(0);
 }
 
@@ -134,8 +151,10 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec)
                return(3);
 
        if (leaf->base.rec_type == HAMMER_RECTYPE_DATA) {
+               /* leaf_end <= rec_beg */
                if (leaf->base.key <= rec->leaf.base.key - rec->leaf.data_len)
                        return(-2);
+               /* leaf_beg >= rec_end */
                if (leaf->base.key - leaf->data_len >= rec->leaf.base.key)
                        return(2);
        } else {
@@ -145,6 +164,7 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec)
                        return(2);
        }
 
+#if 0
        if (leaf->base.create_tid == 0) {
                if (rec->leaf.base.create_tid == 0)
                        return(0);
@@ -156,6 +176,12 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec)
                return(-1);
        if (leaf->base.create_tid > rec->leaf.base.create_tid)
                return(1);
+#endif
+       /*
+        * Never match against an item deleted by the front-end.
+        */
+       if (rec->flags & HAMMER_RECF_DELETED_FE)
+               return(1);
         return(0);
 }
 
@@ -164,9 +190,6 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec)
  * is reversed so the comparison result has to be negated.  key_beg and
  * key_end are both range-inclusive.
  *
- * The creation timestamp can cause hammer_rec_cmp() to return -1 or +1.
- * These do not stop the scan.
- *
  * Localized deletions are not cached in-memory.
  */
 static
@@ -825,6 +848,7 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes,
                                               &record->leaf.data_offset,
                                               errorp);
        if (record->resv == NULL) {
+               kprintf("hammer_ip_add_bulk: reservation failed\n");
                hammer_rel_mem_record(record);
                return(NULL);
        }
@@ -839,6 +863,7 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes,
 
        hammer_ref(&record->lock);      /* mem_add eats a reference */
        *errorp = hammer_mem_add(record);
+       KKASSERT(*errorp == 0);
 
        return (record);
 }
@@ -1011,6 +1036,8 @@ done:
        return(error);
 }
 
+#if 0
+
 /*
  * Backend code which actually performs the write to the media.  This
  * routine is typically called from the flusher.  The bio will be disposed
@@ -1076,6 +1103,7 @@ hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip,
        return(error);
 }
 
+#endif
 
 /*
  * Backend code.  Sync a record to the media.
@@ -1084,6 +1112,7 @@ int
 hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
 {
        hammer_transaction_t trans = cursor->trans;
+       int64_t file_offset;
        void *bdata;
        int error;
 
@@ -1108,11 +1137,12 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
         * It is ok for the lookup to return ENOENT.
         */
        if (record->type == HAMMER_MEM_RECORD_DATA) {
-               KKASSERT(((record->leaf.base.key - record->leaf.data_len) & HAMMER_BUFMASK) == 0);
+               file_offset = record->leaf.base.key - record->leaf.data_len;
+               KKASSERT((file_offset & HAMMER_BUFMASK) == 0);
                error = hammer_ip_delete_range(
                                cursor, record->ip,
-                               record->leaf.base.key - record->leaf.data_len,
-                               HAMMER_BUFSIZE - 1, 1);
+                               file_offset, file_offset + HAMMER_BUFSIZE - 1,
+                               1);
                if (error && error != ENOENT)
                        goto done;
        }
@@ -1547,9 +1577,10 @@ next_memory:
                        int64_t base1 = elm->leaf.base.key - elm->leaf.data_len;
                        int64_t base2 = cursor->iprec->leaf.base.key -
                                        cursor->iprec->leaf.data_len;
-                       if (base1 == base2)
+                       if (base1 == base2) {
+                               kprintf("G");
                                r = 0;
-                       kprintf("G");
+                       }
                }
 
                if (r < 0) {
@@ -1909,6 +1940,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
        int dodelete;
 
        KKASSERT(cursor->flags & HAMMER_CURSOR_BACKEND);
+       KKASSERT(tid != 0);
 
        /*
         * In-memory (unsynchronized) records can simply be freed.  This
index c8e5729..d9b4c80 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.51 2008/06/08 18:16:26 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.52 2008/06/10 00:40:31 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -309,6 +309,7 @@ hammer_unload_volume(hammer_volume_t volume, void *data __unused)
         */
        volume->io.waitdep = 1;
        hammer_io_release(&volume->io, 1);
+       hammer_io_clear_modlist(&volume->io);
 
        /*
         * There should be no references on the volume, no clusters, and
@@ -622,8 +623,11 @@ found:
 
 /*
  * Destroy all buffers covering the specified zoneX offset range.  This
- * is called when the related blockmap layer2 entry is freed.  The buffers
- * must not be in use or modified.
+ * is called when the related blockmap layer2 entry is freed or when
+ * a direct write bypasses our buffer/buffer-cache subsystem.
+ *
+ * The buffers may be referenced by the caller itself.  Setting reclaim
+ * will cause the buffer to be destroyed when it's ref count reaches zero.
  */
 void
 hammer_del_buffers(hammer_mount_t hmp, hammer_off_t base_offset,
@@ -642,13 +646,15 @@ hammer_del_buffers(hammer_mount_t hmp, hammer_off_t base_offset,
                buffer = RB_LOOKUP(hammer_buf_rb_tree, &hmp->rb_bufs_root,
                                   base_offset);
                if (buffer) {
-                       KKASSERT(buffer->io.lock.refs == 0);
-                       KKASSERT(buffer->io.modified == 0);
                        KKASSERT(buffer->zone2_offset == zone2_offset);
+                       hammer_io_clear_modify(&buffer->io);
+                       buffer->io.reclaim = 1;
                        KKASSERT(buffer->volume == volume);
-                       hammer_unload_buffer(buffer, NULL);
+                       if (buffer->io.lock.refs == 0)
+                               hammer_unload_buffer(buffer, NULL);
+               } else {
+                       hammer_io_inval(volume, zone2_offset);
                }
-               hammer_io_inval(volume, zone2_offset);
                base_offset += HAMMER_BUFSIZE;
                zone2_offset += HAMMER_BUFSIZE;
                bytes -= HAMMER_BUFSIZE;
@@ -775,6 +781,7 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush)
                                volume = buffer->volume;
                                buffer->volume = NULL; /* sanity */
                                hammer_rel_volume(volume, 0);
+                               hammer_io_clear_modlist(&buffer->io);
                                freeme = 1;
                        }
                }
@@ -784,7 +791,6 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush)
        hammer_unref(&buffer->io.lock);
        crit_exit();
        if (freeme) {
-               KKASSERT(buffer->io.mod_list == NULL);
                --hammer_count_buffers;
                kfree(buffer, M_HAMMER);
        }
index b44ab45..fb5ccf8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.5 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.6 2008/06/10 00:40:31 dillon Exp $
  */
 
 #include "hammer.h"
@@ -122,8 +122,8 @@ retry:
         */
        cursor.flags |= HAMMER_CURSOR_PRUNING;
 
-       error = hammer_btree_last(&cursor);
        hammer_sync_lock_sh(trans);
+       error = hammer_btree_last(&cursor);
 
        while (error == 0) {
                /*
index 6d1d4ff..af48bbd 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.41 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.42 2008/06/10 00:40:31 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -54,8 +54,9 @@ int hammer_debug_inode;
 int hammer_debug_locks;
 int hammer_debug_btree;
 int hammer_debug_tid;
-int hammer_debug_recover;      /* -1 will disable, +1 will force */
+int hammer_debug_recover;              /* -1 will disable, +1 will force */
 int hammer_debug_recover_faults;
+int hammer_debug_write_release;                /* if 1 release buffer on strategy */
 int hammer_count_inodes;
 int hammer_count_reclaiming;
 int hammer_count_records;
@@ -93,6 +94,8 @@ SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover, CTLFLAG_RW,
           &hammer_debug_recover, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover_faults, CTLFLAG_RW,
           &hammer_debug_recover_faults, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_write_release, CTLFLAG_RW,
+          &hammer_debug_write_release, 0, "");
 
 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufs, CTLFLAG_RW,
           &hammer_limit_dirtybufs, 0, "");
@@ -237,6 +240,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
                hmp->free_lock.refs = 1;
 
                TAILQ_INIT(&hmp->flush_list);
+               TAILQ_INIT(&hmp->delay_list);
                TAILQ_INIT(&hmp->objid_cache_list);
                TAILQ_INIT(&hmp->undo_lru_list);
 
index 98713d1..01184e8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.61 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.62 2008/06/10 00:40:31 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -1984,7 +1984,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                 *
                 * WARNING: If we hit the else clause.
                 */
-               if (roff == 0 && n == bp->b_bufsize &&
+               if (roff == 0 && boff == 0 && n == bp->b_bufsize &&
                    (rec_offset & HAMMER_BUFMASK) == 0) {
                        error = hammer_io_direct_read(trans.hmp, cursor.leaf,
                                                      bio);