HAMMER 56A/Many: Performance tuning - MEDIA STRUCTURES CHANGED!
[dragonfly.git] / sys / vfs / hammer / hammer_undo.c
index c40a815..8041822 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.9 2008/05/02 06:51:57 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.17 2008/06/17 04:02:38 dillon Exp $
  */
 
 /*
 
 #include "hammer.h"
 
+static int hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2);
+
+RB_GENERATE2(hammer_und_rb_tree, hammer_undo, rb_node,
+             hammer_und_rb_compare, hammer_off_t, offset);
+
 /*
  * Convert a zone-3 undo offset into a zone-2 buffer offset.
  */
@@ -48,7 +53,6 @@ hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp)
 {
        hammer_volume_t root_volume;
        hammer_blockmap_t undomap;
-       struct hammer_blockmap_layer2 *layer2;
        hammer_off_t result_offset;
        int i;
 
@@ -61,8 +65,7 @@ hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp)
        KKASSERT (zone3_off < undomap->alloc_offset);
 
        i = (zone3_off & HAMMER_OFF_SHORT_MASK) / HAMMER_LARGEBLOCK_SIZE;
-       layer2 = &root_volume->ondisk->vol0_undo_array[i];
-       result_offset = layer2->u.phys_offset +
+       result_offset = root_volume->ondisk->vol0_undo_array[i] +
                        (zone3_off & HAMMER_LARGEBLOCK_MASK64);
 
        hammer_rel_volume(root_volume, 0);
@@ -72,31 +75,41 @@ hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp)
 /*
  * Generate an UNDO record for the block of data at the specified zone1
  * or zone2 offset.
+ *
+ * The recovery code will execute UNDOs in reverse order, allowing overlaps.
+ * All the UNDOs are executed together so if we already laid one down we
+ * do not have to lay another one down for the same range.
  */
 int
 hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io,
                     hammer_off_t zone_off, void *base, int len)
 {
+       hammer_mount_t hmp;
        hammer_volume_t root_volume;
-       hammer_volume_ondisk_t ondisk;
        hammer_blockmap_t undomap;
        hammer_buffer_t buffer = NULL;
-       struct hammer_blockmap_layer2 *layer2;
        hammer_fifo_undo_t undo;
        hammer_fifo_tail_t tail;
        hammer_off_t next_offset;
-       hammer_off_t result_offset;
-       int i;
        int error;
        int bytes;
 
+       hmp = trans->hmp;
+
+       /*
+        * Enter the offset into our undo history.  If there is an existing
+        * undo we do not have to generate a new one.
+        */
+       if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY)
+               return(0);
+
        root_volume = trans->rootvol;
-       ondisk = root_volume->ondisk;
-       undomap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
+       undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
 
        /* no undo recursion */
        hammer_modify_volume(NULL, root_volume, NULL, 0);
 
+       hammer_lock_ex(&hmp->undo_lock);
 again:
        /*
         * Allocate space in the FIFO
@@ -104,7 +117,7 @@ again:
        bytes = ((len + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) +
                sizeof(struct hammer_fifo_undo) +
                sizeof(struct hammer_fifo_tail);
-       if (hammer_undo_space(trans->hmp) < bytes + HAMMER_BUFSIZE*2)
+       if (hammer_undo_space(hmp) < bytes + HAMMER_BUFSIZE*2)
                panic("hammer: insufficient undo FIFO space!");
 
        next_offset = undomap->next_offset;
@@ -115,24 +128,21 @@ again:
        if (undomap->next_offset == undomap->alloc_offset) {
                next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0);
                undomap->next_offset = next_offset;
-               kprintf("undo zone's next_offset wrapped\n");
+               hkprintf("undo zone's next_offset wrapped\n");
        }
 
-       i = (next_offset & HAMMER_OFF_SHORT_MASK) / HAMMER_LARGEBLOCK_SIZE;
-       layer2 = &root_volume->ondisk->vol0_undo_array[i];
-       result_offset = layer2->u.phys_offset +
-                       (next_offset & HAMMER_LARGEBLOCK_MASK64);
-
-       undo = hammer_bread(trans->hmp, result_offset, &error, &buffer);
-
        /*
-        * We raced another thread, try again.
+        * This is a tail-chasing FIFO, when we hit the start of a new
+        * buffer we don't have to read it in.
         */
-       if (undomap->next_offset != next_offset)
-               goto again;
-
+       if ((next_offset & HAMMER_BUFMASK) == 0)
+               undo = hammer_bnew(hmp, next_offset, &error, &buffer);
+       else
+               undo = hammer_bread(hmp, next_offset, &error, &buffer);
        hammer_modify_buffer(NULL, buffer, NULL, 0);
 
+       KKASSERT(undomap->next_offset == next_offset);
+
        /*
         * The FIFO entry would cross a buffer boundary, PAD to the end
         * of the buffer and try again.  Due to our data alignment, the
@@ -140,7 +150,7 @@ again:
         * populate the first 8 bytes of hammer_fifo_head and the tail may
         * be at the same offset as the head.
         */
-       if ((result_offset ^ (result_offset + bytes)) & ~HAMMER_BUFMASK64) {
+       if ((next_offset ^ (next_offset + bytes)) & ~HAMMER_BUFMASK64) {
                bytes = HAMMER_BUFSIZE - ((int)next_offset & HAMMER_BUFMASK);
                tail = (void *)((char *)undo + bytes - sizeof(*tail));
                if ((void *)undo != (void *)tail) {
@@ -151,12 +161,13 @@ again:
                undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
                undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD;
                undo->head.hdr_size = bytes;
+               /* NO CRC */
                undomap->next_offset += bytes;
                hammer_modify_buffer_done(buffer);
                goto again;
        }
        if (hammer_debug_general & 0x0080)
-               kprintf("undo %016llx %d %d\n", result_offset, bytes, len);
+               kprintf("undo %016llx %d %d\n", next_offset, bytes, len);
 
        /*
         * We're good, create the entry.
@@ -175,38 +186,102 @@ again:
        tail->tail_type = HAMMER_HEAD_TYPE_UNDO;
        tail->tail_size = bytes;
 
-       undo->head.hdr_crc = crc32(undo, bytes);
-       hammer_modify_buffer_done(buffer);
-
-       /*
-        * Update the undo offset space in the IO XXX
-        */
-
+       KKASSERT(bytes >= sizeof(undo->head));
+       undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
+                            crc32(&undo->head + 1, bytes - sizeof(undo->head));
        undomap->next_offset += bytes;
+
+       hammer_modify_buffer_done(buffer);
        hammer_modify_volume_done(root_volume);
 
+       hammer_unlock(&hmp->undo_lock);
+
        if (buffer)
                hammer_rel_buffer(buffer, 0);
        return(error);
 }
 
+/*
+ * UNDO HISTORY API
+ *
+ * It is not necessary to layout an undo record for the same address space
+ * multiple times.  Maintain a cache of recent undo's.
+ */
+
+/*
+ * Enter an undo into the history.  Return EALREADY if the request completely
+ * covers a previous request.
+ */
+int
+hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset, int bytes)
+{
+       hammer_undo_t node;
+       hammer_undo_t onode;
+
+       node = RB_LOOKUP(hammer_und_rb_tree, &hmp->rb_undo_root, offset);
+       if (node) {
+               TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry);
+               TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry);
+               if (bytes <= node->bytes)
+                       return(EALREADY);
+               node->bytes = bytes;
+               return(0);
+       }
+       if (hmp->undo_alloc != HAMMER_MAX_UNDOS) {
+               node = &hmp->undos[hmp->undo_alloc++];
+       } else {
+               node = TAILQ_FIRST(&hmp->undo_lru_list);
+               TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry);
+               RB_REMOVE(hammer_und_rb_tree, &hmp->rb_undo_root, node);
+       }
+       node->offset = offset;
+       node->bytes = bytes;
+       TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry);
+       onode = RB_INSERT(hammer_und_rb_tree, &hmp->rb_undo_root, node);
+       KKASSERT(onode == NULL);
+       return(0);
+}
+
+void
+hammer_clear_undo_history(hammer_mount_t hmp)
+{
+       RB_INIT(&hmp->rb_undo_root);
+       TAILQ_INIT(&hmp->undo_lru_list);
+       hmp->undo_alloc = 0;
+}
+
+/*
+ * Misc helper routines.  Return available space and total space.
+ */
 int64_t
-hammer_undo_space(hammer_mount_t hmp)
+hammer_undo_used(hammer_mount_t hmp)
 {
        hammer_blockmap_t rootmap;
-       int64_t bytes;
        int64_t max_bytes;
+       int64_t bytes;
 
        rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
 
        if (rootmap->first_offset <= rootmap->next_offset) {
-               bytes = (int)(rootmap->next_offset - rootmap->first_offset);
+               bytes = rootmap->next_offset - rootmap->first_offset;
        } else {
-               bytes = (int)(rootmap->alloc_offset - rootmap->first_offset +
-                             rootmap->next_offset);
+               bytes = rootmap->alloc_offset - rootmap->first_offset +
+                       (rootmap->next_offset & HAMMER_OFF_LONG_MASK);
        }
-       max_bytes = (int)(rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK);
-       return(max_bytes - bytes);
+       max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK;
+       KKASSERT(bytes <= max_bytes);
+       return(bytes);
+}
+
+int64_t
+hammer_undo_space(hammer_mount_t hmp)
+{
+       hammer_blockmap_t rootmap;
+       int64_t max_bytes;
+
+       rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
+       max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK;
+       return(max_bytes - hammer_undo_used(hmp));
 }
 
 int64_t
@@ -216,8 +291,18 @@ hammer_undo_max(hammer_mount_t hmp)
        int64_t max_bytes;
 
        rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
-       max_bytes = (int)(rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK);
+       max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK;
 
        return(max_bytes);
 }
 
+static int
+hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2)
+{
+        if (node1->offset < node2->offset)
+                return(-1);
+        if (node1->offset > node2->offset)
+                return(1);
+        return(0);
+}
+