| 1 | /* |
| 2 | * Copyright (c) 2008 The DragonFly Project. All rights reserved. |
| 3 | * |
| 4 | * This code is derived from software contributed to The DragonFly Project |
| 5 | * by Matthew Dillon <dillon@backplane.com> |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in |
| 15 | * the documentation and/or other materials provided with the |
| 16 | * distribution. |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its |
| 18 | * contributors may be used to endorse or promote products derived |
| 19 | * from this software without specific, prior written permission. |
| 20 | * |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 32 | * SUCH DAMAGE. |
| 33 | * |
| 34 | * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.20 2008/07/18 00:19:53 dillon Exp $ |
| 35 | */ |
| 36 | |
| 37 | /* |
| 38 | * HAMMER undo - undo buffer/FIFO management. |
| 39 | */ |
| 40 | |
| 41 | #include "hammer.h" |
| 42 | |
| 43 | static int hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2); |
| 44 | |
| 45 | RB_GENERATE2(hammer_und_rb_tree, hammer_undo, rb_node, |
| 46 | hammer_und_rb_compare, hammer_off_t, offset); |
| 47 | |
| 48 | /* |
| 49 | * Convert a zone-3 undo offset into a zone-2 buffer offset. |
| 50 | */ |
| 51 | hammer_off_t |
| 52 | hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp) |
| 53 | { |
| 54 | hammer_volume_t root_volume; |
| 55 | hammer_blockmap_t undomap; |
| 56 | hammer_off_t result_offset; |
| 57 | int i; |
| 58 | |
| 59 | KKASSERT((zone3_off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_UNDO); |
| 60 | root_volume = hammer_get_root_volume(hmp, errorp); |
| 61 | if (*errorp) |
| 62 | return(0); |
| 63 | undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 64 | KKASSERT(HAMMER_ZONE_DECODE(undomap->alloc_offset) == HAMMER_ZONE_UNDO_INDEX); |
| 65 | KKASSERT (zone3_off < undomap->alloc_offset); |
| 66 | |
| 67 | i = (zone3_off & HAMMER_OFF_SHORT_MASK) / HAMMER_LARGEBLOCK_SIZE; |
| 68 | result_offset = root_volume->ondisk->vol0_undo_array[i] + |
| 69 | (zone3_off & HAMMER_LARGEBLOCK_MASK64); |
| 70 | |
| 71 | hammer_rel_volume(root_volume, 0); |
| 72 | return(result_offset); |
| 73 | } |
| 74 | |
| 75 | /* |
| 76 | * Generate an UNDO record for the block of data at the specified zone1 |
| 77 | * or zone2 offset. |
| 78 | * |
| 79 | * The recovery code will execute UNDOs in reverse order, allowing overlaps. |
| 80 | * All the UNDOs are executed together so if we already laid one down we |
| 81 | * do not have to lay another one down for the same range. |
| 82 | */ |
| 83 | int |
| 84 | hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io, |
| 85 | hammer_off_t zone_off, void *base, int len) |
| 86 | { |
| 87 | hammer_mount_t hmp; |
| 88 | hammer_volume_t root_volume; |
| 89 | hammer_blockmap_t undomap; |
| 90 | hammer_buffer_t buffer = NULL; |
| 91 | hammer_fifo_undo_t undo; |
| 92 | hammer_fifo_tail_t tail; |
| 93 | hammer_off_t next_offset; |
| 94 | int error; |
| 95 | int bytes; |
| 96 | |
| 97 | hmp = trans->hmp; |
| 98 | |
| 99 | /* |
| 100 | * Enter the offset into our undo history. If there is an existing |
| 101 | * undo we do not have to generate a new one. |
| 102 | */ |
| 103 | if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY) |
| 104 | return(0); |
| 105 | |
| 106 | root_volume = trans->rootvol; |
| 107 | undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 108 | |
| 109 | /* no undo recursion */ |
| 110 | hammer_modify_volume(NULL, root_volume, NULL, 0); |
| 111 | |
| 112 | hammer_lock_ex(&hmp->undo_lock); |
| 113 | again: |
| 114 | /* |
| 115 | * Allocate space in the FIFO |
| 116 | */ |
| 117 | bytes = ((len + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + |
| 118 | sizeof(struct hammer_fifo_undo) + |
| 119 | sizeof(struct hammer_fifo_tail); |
| 120 | if (hammer_undo_space(trans) < bytes + HAMMER_BUFSIZE*2) |
| 121 | panic("hammer: insufficient undo FIFO space!"); |
| 122 | |
| 123 | next_offset = undomap->next_offset; |
| 124 | |
| 125 | /* |
| 126 | * Wrap next_offset |
| 127 | */ |
| 128 | if (undomap->next_offset == undomap->alloc_offset) { |
| 129 | next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); |
| 130 | undomap->next_offset = next_offset; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * This is a tail-chasing FIFO, when we hit the start of a new |
| 135 | * buffer we don't have to read it in. |
| 136 | */ |
| 137 | if ((next_offset & HAMMER_BUFMASK) == 0) |
| 138 | undo = hammer_bnew(hmp, next_offset, &error, &buffer); |
| 139 | else |
| 140 | undo = hammer_bread(hmp, next_offset, &error, &buffer); |
| 141 | if (error) |
| 142 | goto done; |
| 143 | |
| 144 | hammer_modify_buffer(NULL, buffer, NULL, 0); |
| 145 | |
| 146 | KKASSERT(undomap->next_offset == next_offset); |
| 147 | |
| 148 | /* |
| 149 | * The FIFO entry would cross a buffer boundary, PAD to the end |
| 150 | * of the buffer and try again. Due to our data alignment, the |
| 151 | * worst case (smallest) PAD record is 8 bytes. PAD records only |
| 152 | * populate the first 8 bytes of hammer_fifo_head and the tail may |
| 153 | * be at the same offset as the head. |
| 154 | */ |
| 155 | if ((next_offset ^ (next_offset + bytes)) & ~HAMMER_BUFMASK64) { |
| 156 | bytes = HAMMER_BUFSIZE - ((int)next_offset & HAMMER_BUFMASK); |
| 157 | tail = (void *)((char *)undo + bytes - sizeof(*tail)); |
| 158 | if ((void *)undo != (void *)tail) { |
| 159 | tail->tail_signature = HAMMER_TAIL_SIGNATURE; |
| 160 | tail->tail_type = HAMMER_HEAD_TYPE_PAD; |
| 161 | tail->tail_size = bytes; |
| 162 | } |
| 163 | undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; |
| 164 | undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; |
| 165 | undo->head.hdr_size = bytes; |
| 166 | /* NO CRC */ |
| 167 | undomap->next_offset += bytes; |
| 168 | hammer_modify_buffer_done(buffer); |
| 169 | goto again; |
| 170 | } |
| 171 | if (hammer_debug_general & 0x0080) { |
| 172 | kprintf("undo %016llx %d %d\n", |
| 173 | (long long)next_offset, bytes, len); |
| 174 | } |
| 175 | |
| 176 | /* |
| 177 | * We're good, create the entry. |
| 178 | */ |
| 179 | undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; |
| 180 | undo->head.hdr_type = HAMMER_HEAD_TYPE_UNDO; |
| 181 | undo->head.hdr_size = bytes; |
| 182 | undo->head.reserved01 = 0; |
| 183 | undo->head.hdr_crc = 0; |
| 184 | undo->undo_offset = zone_off; |
| 185 | undo->undo_data_bytes = len; |
| 186 | bcopy(base, undo + 1, len); |
| 187 | |
| 188 | tail = (void *)((char *)undo + bytes - sizeof(*tail)); |
| 189 | tail->tail_signature = HAMMER_TAIL_SIGNATURE; |
| 190 | tail->tail_type = HAMMER_HEAD_TYPE_UNDO; |
| 191 | tail->tail_size = bytes; |
| 192 | |
| 193 | KKASSERT(bytes >= sizeof(undo->head)); |
| 194 | undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ |
| 195 | crc32(&undo->head + 1, bytes - sizeof(undo->head)); |
| 196 | undomap->next_offset += bytes; |
| 197 | |
| 198 | hammer_modify_buffer_done(buffer); |
| 199 | done: |
| 200 | hammer_modify_volume_done(root_volume); |
| 201 | hammer_unlock(&hmp->undo_lock); |
| 202 | |
| 203 | if (buffer) |
| 204 | hammer_rel_buffer(buffer, 0); |
| 205 | return(error); |
| 206 | } |
| 207 | |
| 208 | /* |
| 209 | * UNDO HISTORY API |
| 210 | * |
| 211 | * It is not necessary to layout an undo record for the same address space |
| 212 | * multiple times. Maintain a cache of recent undo's. |
| 213 | */ |
| 214 | |
| 215 | /* |
| 216 | * Enter an undo into the history. Return EALREADY if the request completely |
| 217 | * covers a previous request. |
| 218 | */ |
| 219 | int |
| 220 | hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset, int bytes) |
| 221 | { |
| 222 | hammer_undo_t node; |
| 223 | hammer_undo_t onode; |
| 224 | |
| 225 | node = RB_LOOKUP(hammer_und_rb_tree, &hmp->rb_undo_root, offset); |
| 226 | if (node) { |
| 227 | TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry); |
| 228 | TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry); |
| 229 | if (bytes <= node->bytes) |
| 230 | return(EALREADY); |
| 231 | node->bytes = bytes; |
| 232 | return(0); |
| 233 | } |
| 234 | if (hmp->undo_alloc != HAMMER_MAX_UNDOS) { |
| 235 | node = &hmp->undos[hmp->undo_alloc++]; |
| 236 | } else { |
| 237 | node = TAILQ_FIRST(&hmp->undo_lru_list); |
| 238 | TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry); |
| 239 | RB_REMOVE(hammer_und_rb_tree, &hmp->rb_undo_root, node); |
| 240 | } |
| 241 | node->offset = offset; |
| 242 | node->bytes = bytes; |
| 243 | TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry); |
| 244 | onode = RB_INSERT(hammer_und_rb_tree, &hmp->rb_undo_root, node); |
| 245 | KKASSERT(onode == NULL); |
| 246 | return(0); |
| 247 | } |
| 248 | |
| 249 | void |
| 250 | hammer_clear_undo_history(hammer_mount_t hmp) |
| 251 | { |
| 252 | RB_INIT(&hmp->rb_undo_root); |
| 253 | TAILQ_INIT(&hmp->undo_lru_list); |
| 254 | hmp->undo_alloc = 0; |
| 255 | } |
| 256 | |
| 257 | /* |
| 258 | * Return how much of the undo FIFO has been used |
| 259 | * |
| 260 | * The calculation includes undo FIFO space still reserved from a previous |
| 261 | * flush (because it will still be run on recovery if a crash occurs and |
| 262 | * we can't overwrite it yet). |
| 263 | */ |
| 264 | int64_t |
| 265 | hammer_undo_used(hammer_transaction_t trans) |
| 266 | { |
| 267 | hammer_blockmap_t cundomap; |
| 268 | hammer_blockmap_t dundomap; |
| 269 | int64_t max_bytes; |
| 270 | int64_t bytes; |
| 271 | |
| 272 | cundomap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 273 | dundomap = &trans->rootvol->ondisk-> |
| 274 | vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 275 | |
| 276 | if (dundomap->first_offset <= cundomap->next_offset) { |
| 277 | bytes = cundomap->next_offset - dundomap->first_offset; |
| 278 | } else { |
| 279 | bytes = cundomap->alloc_offset - dundomap->first_offset + |
| 280 | (cundomap->next_offset & HAMMER_OFF_LONG_MASK); |
| 281 | } |
| 282 | max_bytes = cundomap->alloc_offset & HAMMER_OFF_SHORT_MASK; |
| 283 | KKASSERT(bytes <= max_bytes); |
| 284 | return(bytes); |
| 285 | } |
| 286 | |
| 287 | /* |
| 288 | * Return how much of the undo FIFO is available for new records. |
| 289 | */ |
| 290 | int64_t |
| 291 | hammer_undo_space(hammer_transaction_t trans) |
| 292 | { |
| 293 | hammer_blockmap_t rootmap; |
| 294 | int64_t max_bytes; |
| 295 | |
| 296 | rootmap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 297 | max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK; |
| 298 | return(max_bytes - hammer_undo_used(trans)); |
| 299 | } |
| 300 | |
| 301 | int64_t |
| 302 | hammer_undo_max(hammer_mount_t hmp) |
| 303 | { |
| 304 | hammer_blockmap_t rootmap; |
| 305 | int64_t max_bytes; |
| 306 | |
| 307 | rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 308 | max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK; |
| 309 | |
| 310 | return(max_bytes); |
| 311 | } |
| 312 | |
| 313 | static int |
| 314 | hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2) |
| 315 | { |
| 316 | if (node1->offset < node2->offset) |
| 317 | return(-1); |
| 318 | if (node1->offset > node2->offset) |
| 319 | return(1); |
| 320 | return(0); |
| 321 | } |
| 322 | |