HAMMER VFS - Fix I/O invalidation collision assertion
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 1 Feb 2009 06:53:38 +0000 (22:53 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 1 Feb 2009 06:53:38 +0000 (22:53 -0800)
Fix more issues with the hammer_reserve subsystem.

* Fix a broken base_offset calculation which had reversed the zone
  and offset fields in a HAMMER_ZONE_ENCODE() call.  The broken
  base_offset caused hammer_del_buffers() to scan the wrong offset
  range, leaving conflicting buffer cache buffers intact.

  The resulting conflict caused a KKASSERT(LIST_FIRST(&bp->b_dep) == NULL)
  to fail later on.

* Hold a reference through a potentially blocking operation in
  hammer_blockmap_alloc().

* Record the proper zone in a hammer_reserve() structure created via
  hammer_blockmap_free().  Without this the I/O invalidation code
  cannot locate conflicting hammer_buffer structures.

* hammer_reserve_setdelay_offset() was not actually placing the
  hammer_reserve() structure in the delay queue, allowing it to
  be disposed of too early.

sys/vfs/hammer/hammer_blockmap.c

index 216b0d8..d88d11f 100644 (file)
@@ -41,7 +41,7 @@
 
 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
-                                   hammer_off_t base_offset,
+                                   hammer_off_t base_offset, int zone,
                                    struct hammer_blockmap_layer2 *layer2);
 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
 
@@ -244,6 +244,7 @@ again:
                        next_offset += resv->append_off - offset;
                        goto again;
                }
+               ++resv->refs;
        }
 
        /*
@@ -285,10 +286,17 @@ again:
        hammer_modify_buffer_done(buffer2);
        KKASSERT(layer2->bytes_free >= 0);
 
+       /*
+        * We hold the blockmap lock and should be the only ones
+        * capable of modifying resv->append_off.  Track the allocation
+        * as appropriate.
+        */
+       KKASSERT(bytes != 0);
        if (resv) {
                KKASSERT(resv->append_off <= offset);
                resv->append_off = offset + bytes;
                resv->flags &= ~HAMMER_RESF_LAYER2FREE;
+               hammer_blockmap_reserve_complete(hmp, resv);
        }
 
        /*
@@ -596,9 +604,8 @@ hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
         */
        if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
                resv->append_off = HAMMER_LARGEBLOCK_SIZE;
-               resv->flags &= ~HAMMER_RESF_LAYER2FREE;
-               base_offset = resv->zone_offset & ~HAMMER_ZONE_RAW_BUFFER;
-               base_offset = HAMMER_ZONE_ENCODE(base_offset, resv->zone);
+               base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
+               base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
                error = hammer_del_buffers(hmp, base_offset,
                                           resv->zone_offset,
                                           HAMMER_LARGEBLOCK_SIZE,
@@ -619,26 +626,33 @@ hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
  * the related flushes have completely cycled, otherwise crash recovery
  * could resurrect a data block that was already reused and overwritten.
  *
- * Return 0 if the layer2 entry is still completely free after the
- * reservation has been allocated.
+ * The caller might reset the underlying layer2 entry's append_off to 0, so
+ * our covering append_off must be set to max to prevent any reallocation
+ * until after the flush delays complete, not to mention proper invalidation
+ * of any underlying cached blocks.
  */
 static void
 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
-                       struct hammer_blockmap_layer2 *layer2)
+                       int zone, struct hammer_blockmap_layer2 *layer2)
 {
        hammer_reserve_t resv;
 
        /*
         * Allocate the reservation if necessary.
+        *
+        * NOTE: need lock in future around resv lookup/allocation and
+        * the setdelay call, currently refs is not bumped until the call.
         */
 again:
        resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
        if (resv == NULL) {
                resv = kmalloc(sizeof(*resv), hmp->m_misc,
                               M_WAITOK | M_ZERO | M_USE_RESERVE);
+               resv->zone = zone;
                resv->zone_offset = base_offset;
                resv->refs = 0;
-               /* XXX inherent lock until refs bumped later on */
+               resv->append_off = HAMMER_LARGEBLOCK_SIZE;
+
                if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
                        resv->flags |= HAMMER_RESF_LAYER2FREE;
                if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
@@ -646,7 +660,11 @@ again:
                        goto again;
                }
                ++hammer_count_reservations;
+       } else {
+               if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
+                       resv->flags |= HAMMER_RESF_LAYER2FREE;
        }
+       hammer_reserve_setdelay(hmp, resv);
 }
 
 /*
@@ -788,7 +806,7 @@ hammer_blockmap_free(hammer_transaction_t trans,
        if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
                base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
 
-               hammer_reserve_setdelay_offset(hmp, base_off, layer2);
+               hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
                if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
                        layer2->zone = 0;
                        layer2->append_off = 0;
@@ -925,6 +943,7 @@ hammer_blockmap_finalize(hammer_transaction_t trans,
        if (layer2->zone != zone)
                kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
        KKASSERT(layer2->zone == zone);
+       KKASSERT(bytes != 0);
        layer2->bytes_free -= bytes;
        if (resv)
                resv->flags &= ~HAMMER_RESF_LAYER2FREE;