HAMMER - Implement experimental volume removal
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
index a4656d9..bff567f 100644 (file)
 #include "hammer.h"
 
 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
-static void hammer_reserve_setdelay(hammer_mount_t hmp,
-                                   hammer_off_t base_offset,
+static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
+                                   hammer_off_t base_offset, int zone,
                                    struct hammer_blockmap_layer2 *layer2);
-
+static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
 
 /*
  * Reserved big-blocks red-black tree support
@@ -65,8 +65,8 @@ hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
  * Allocate bytes from a zone
  */
 hammer_off_t
-hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
-                     int bytes, int *errorp)
+hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
+                     hammer_off_t hint, int *errorp)
 {
        hammer_mount_t hmp;
        hammer_volume_t root_volume;
@@ -86,6 +86,7 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
        hammer_off_t base_off;
        int loops = 0;
        int offset;             /* offset within big-block */
+       int use_hint;
 
        hmp = trans->hmp;
 
@@ -108,8 +109,26 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
        freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
        KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
 
-       next_offset = blockmap->next_offset;
+       /*
+        * Use the hint if we have one.
+        */
+       if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
+               next_offset = (hint + 15) & ~(hammer_off_t)15;
+               use_hint = 1;
+       } else {
+               next_offset = blockmap->next_offset;
+               use_hint = 0;
+       }
 again:
+
+       /*
+        * use_hint is turned off if we leave the hinted big-block.
+        */
+       if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
+               next_offset = blockmap->next_offset;
+               use_hint = 0;
+       }
+
        /*
         * Check for wrap
         */
@@ -145,6 +164,17 @@ again:
         */
        layer1_offset = freemap->phys_offset +
                        HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
+
+       /*
+        * Skip this block if it is belonging to a volume that we are
+         * currently trying to remove from the file-system.
+        */
+       if ((int)HAMMER_VOL_DECODE(layer1_offset) == hmp->volume_to_remove) {
+               next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
+                             ~HAMMER_BLOCKMAP_LAYER2_MASK;
+               goto again;
+       }
+
        layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
        if (*errorp) {
                result_offset = 0;
@@ -155,7 +185,10 @@ again:
         * Check CRC.
         */
        if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER1");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
+                       panic("CRC FAILED: LAYER1");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -182,10 +215,14 @@ again:
        }
 
        /*
-        * Check CRC.
+        * Check CRC.  This can race another thread holding the lock
+        * and in the middle of modifying layer2.
         */
        if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER2");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
+                       panic("CRC FAILED: LAYER2");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -200,6 +237,26 @@ again:
                goto again;
        }
 
+       /*
+        * If operating in the current non-hint blockmap block, do not
+        * allow it to get over-full.  Also drop any active hinting so
+        * blockmap->next_offset is updated at the end.
+        *
+        * We do this for B-Tree and meta-data allocations to provide
+        * localization for updates.
+        */
+       if ((zone == HAMMER_ZONE_BTREE_INDEX ||
+            zone == HAMMER_ZONE_META_INDEX) &&
+           offset >= HAMMER_LARGEBLOCK_OVERFILL &&
+           !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
+       ) {
+               if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
+                       next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
+                       use_hint = 0;
+                       goto again;
+               }
+       }
+
        /*
         * We need the lock from this point on.  We have to re-check zone
         * ownership after acquiring the lock and also check for reservations.
@@ -237,6 +294,7 @@ again:
                        next_offset += resv->append_off - offset;
                        goto again;
                }
+               ++resv->refs;
        }
 
        /*
@@ -278,10 +336,17 @@ again:
        hammer_modify_buffer_done(buffer2);
        KKASSERT(layer2->bytes_free >= 0);
 
+       /*
+        * We hold the blockmap lock and should be the only ones
+        * capable of modifying resv->append_off.  Track the allocation
+        * as appropriate.
+        */
+       KKASSERT(bytes != 0);
        if (resv) {
                KKASSERT(resv->append_off <= offset);
                resv->append_off = offset + bytes;
                resv->flags &= ~HAMMER_RESF_LAYER2FREE;
+               hammer_blockmap_reserve_complete(hmp, resv);
        }
 
        /*
@@ -295,11 +360,15 @@ again:
        result_offset = next_offset;
 
        /*
-        * Process allocated result_offset
+        * If we weren't supplied with a hint or could not use the hint
+        * then we wound up using blockmap->next_offset as the hint and
+        * need to save it.
         */
-       hammer_modify_volume(NULL, root_volume, NULL, 0);
-       blockmap->next_offset = next_offset + bytes;
-       hammer_modify_volume_done(root_volume);
+       if (use_hint == 0) {
+               hammer_modify_volume(NULL, root_volume, NULL, 0);
+               blockmap->next_offset = next_offset + bytes;
+               hammer_modify_volume_done(root_volume);
+       }
        hammer_unlock(&hmp->blkmap_lock);
 failed:
 
@@ -413,7 +482,10 @@ again:
         * Check CRC.
         */
        if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER1");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
+                       panic("CRC FAILED: LAYER1");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -443,7 +515,10 @@ again:
         * aren't when reserving space).
         */
        if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER2");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
+                       panic("CRC FAILED: LAYER2");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -498,7 +573,7 @@ again:
                ++resv->refs;
                resx = NULL;
        } else {
-               resx = kmalloc(sizeof(*resv), M_HAMMER,
+               resx = kmalloc(sizeof(*resv), hmp->m_misc,
                               M_WAITOK | M_ZERO | M_USE_RESERVE);
                resx->refs = 1;
                resx->zone = zone;
@@ -544,19 +619,6 @@ failed:
        return(resv);
 }
 
-#if 0
-/*
- * Backend function - undo a portion of a reservation.
- */
-void
-hammer_blockmap_reserve_undo(hammer_mount_t hmp, hammer_reserve_t resv,
-                        hammer_off_t zone_offset, int bytes)
-{
-       resv->bytes_freed += bytes;
-}
-
-#endif
-
 /*
  * Dereference a reservation structure.  Upon the final release the
  * underlying big-block is checked and if it is entirely free we delete
@@ -567,6 +629,7 @@ void
 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
 {
        hammer_off_t base_offset;
+       int error;
 
        KKASSERT(resv->refs > 0);
        KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
@@ -576,19 +639,25 @@ hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
         * Setting append_off to the max prevents any new allocations
         * from occuring while we are trying to dispose of the reservation,
         * allowing us to safely delete any related HAMMER buffers.
+        *
+        * If we are unable to clean out all related HAMMER buffers we
+        * requeue the delay.
         */
        if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
                resv->append_off = HAMMER_LARGEBLOCK_SIZE;
-               resv->flags &= ~HAMMER_RESF_LAYER2FREE;
-               base_offset = resv->zone_offset & ~HAMMER_ZONE_RAW_BUFFER;
-               base_offset = HAMMER_ZONE_ENCODE(base_offset, resv->zone);
-               hammer_del_buffers(hmp, base_offset, resv->zone_offset,
-                                  HAMMER_LARGEBLOCK_SIZE);
+               base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
+               base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
+               error = hammer_del_buffers(hmp, base_offset,
+                                          resv->zone_offset,
+                                          HAMMER_LARGEBLOCK_SIZE,
+                                          0);
+               if (error)
+                       hammer_reserve_setdelay(hmp, resv);
        }
        if (--resv->refs == 0) {
                KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
                RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
-               kfree(resv, M_HAMMER);
+               kfree(resv, hmp->m_misc);
                --hammer_count_reservations;
        }
 }
@@ -598,39 +667,54 @@ hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
  * the related flushes have completely cycled, otherwise crash recovery
  * could resurrect a data block that was already reused and overwritten.
  *
- * Return 0 if the layer2 entry is still completely free after the
- * reservation has been allocated.
+ * The caller might reset the underlying layer2 entry's append_off to 0, so
+ * our covering append_off must be set to max to prevent any reallocation
+ * until after the flush delays complete, not to mention proper invalidation
+ * of any underlying cached blocks.
  */
 static void
-hammer_reserve_setdelay(hammer_mount_t hmp, hammer_off_t base_offset,
-                       struct hammer_blockmap_layer2 *layer2)
+hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
+                       int zone, struct hammer_blockmap_layer2 *layer2)
 {
        hammer_reserve_t resv;
 
        /*
         * Allocate the reservation if necessary.
+        *
+        * NOTE: need lock in future around resv lookup/allocation and
+        * the setdelay call, currently refs is not bumped until the call.
         */
 again:
        resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
        if (resv == NULL) {
-               resv = kmalloc(sizeof(*resv), M_HAMMER,
+               resv = kmalloc(sizeof(*resv), hmp->m_misc,
                               M_WAITOK | M_ZERO | M_USE_RESERVE);
+               resv->zone = zone;
                resv->zone_offset = base_offset;
                resv->refs = 0;
-               /* XXX inherent lock until refs bumped later on */
+               resv->append_off = HAMMER_LARGEBLOCK_SIZE;
+
                if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
                        resv->flags |= HAMMER_RESF_LAYER2FREE;
                if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
-                       kfree(resv, M_HAMMER);
+                       kfree(resv, hmp->m_misc);
                        goto again;
                }
                ++hammer_count_reservations;
+       } else {
+               if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
+                       resv->flags |= HAMMER_RESF_LAYER2FREE;
        }
+       hammer_reserve_setdelay(hmp, resv);
+}
 
-       /*
-        * Enter the reservation on the on-delay list, or move it if it
-        * is already on the list.
-        */
+/*
+ * Enter the reservation on the on-delay list, or move it if it
+ * is already on the list.
+ */
+static void
+hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
+{
        if (resv->flags & HAMMER_RESF_ONDELAY) {
                TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
                resv->flush_group = hmp->flusher.next + 1;
@@ -711,7 +795,10 @@ hammer_blockmap_free(hammer_transaction_t trans,
        KKASSERT(layer1->phys_offset &&
                 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
        if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER1");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
+                       panic("CRC FAILED: LAYER1");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -723,7 +810,10 @@ hammer_blockmap_free(hammer_transaction_t trans,
        if (error)
                goto failed;
        if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER2");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
+                       panic("CRC FAILED: LAYER2");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        hammer_lock_ex(&hmp->blkmap_lock);
@@ -757,7 +847,7 @@ hammer_blockmap_free(hammer_transaction_t trans,
        if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
                base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
 
-               hammer_reserve_setdelay(hmp, base_off, layer2);
+               hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
                if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
                        layer2->zone = 0;
                        layer2->append_off = 0;
@@ -843,7 +933,10 @@ hammer_blockmap_finalize(hammer_transaction_t trans,
        KKASSERT(layer1->phys_offset &&
                 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
        if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER1");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
+                       panic("CRC FAILED: LAYER1");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -855,7 +948,10 @@ hammer_blockmap_finalize(hammer_transaction_t trans,
        if (error)
                goto failed;
        if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER2");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
+                       panic("CRC FAILED: LAYER2");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        hammer_lock_ex(&hmp->blkmap_lock);
@@ -888,6 +984,7 @@ hammer_blockmap_finalize(hammer_transaction_t trans,
        if (layer2->zone != zone)
                kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
        KKASSERT(layer2->zone == zone);
+       KKASSERT(bytes != 0);
        layer2->bytes_free -= bytes;
        if (resv)
                resv->flags &= ~HAMMER_RESF_LAYER2FREE;
@@ -953,7 +1050,10 @@ hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
        }
        KKASSERT(layer1->phys_offset);
        if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER1");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
+                       panic("CRC FAILED: LAYER1");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -969,7 +1069,10 @@ hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
                goto failed;
        }
        if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER2");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
+                       panic("CRC FAILED: LAYER2");
+               hammer_unlock(&hmp->blkmap_lock);
        }
        KKASSERT(layer2->zone == zone);
 
@@ -985,7 +1088,7 @@ failed:
        hammer_rel_volume(root_volume, 0);
        if (hammer_debug_general & 0x0800) {
                kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
-                       zone_offset, bytes);
+                       (long long)zone_offset, bytes);
        }
        return(bytes);
 }
@@ -1047,7 +1150,10 @@ hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
                goto failed;
        KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
        if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER1");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
+                       panic("CRC FAILED: LAYER1");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
        /*
@@ -1070,7 +1176,10 @@ hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
                        layer2->zone, zone);
        }
        if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
-               Debugger("CRC FAILED: LAYER2");
+               hammer_lock_ex(&hmp->blkmap_lock);
+               if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
+                       panic("CRC FAILED: LAYER2");
+               hammer_unlock(&hmp->blkmap_lock);
        }
 
 failed:
@@ -1079,7 +1188,7 @@ failed:
        hammer_rel_volume(root_volume, 0);
        if (hammer_debug_general & 0x0800) {
                kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
-                       zone_offset, result_offset);
+                       (long long)zone_offset, (long long)result_offset);
        }
        return(result_offset);
 }
@@ -1089,7 +1198,7 @@ failed:
  * Check space availability
  */
 int
-hammer_checkspace(hammer_mount_t hmp, int slop)
+_hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
 {
        const int in_size = sizeof(struct hammer_inode_data) +
                            sizeof(union hammer_btree_elm);
@@ -1104,6 +1213,8 @@ hammer_checkspace(hammer_mount_t hmp, int slop)
                    (slop << HAMMER_LARGEBLOCK_BITS);
 
        hammer_count_extra_space_used = usedbytes;      /* debugging */
+       if (resp)
+               *resp = usedbytes;
 
        if (hmp->copy_stat_freebigblocks >=
            (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {