| Commit | Line | Data |
|---|---|---|
| bf686dbe MD |
1 | /* |
| 2 | * Copyright (c) 2008 The DragonFly Project. All rights reserved. | |
| 3 | * | |
| 4 | * This code is derived from software contributed to The DragonFly Project | |
| 5 | * by Matthew Dillon <dillon@backplane.com> | |
| 6 | * | |
| 7 | * Redistribution and use in source and binary forms, with or without | |
| 8 | * modification, are permitted provided that the following conditions | |
| 9 | * are met: | |
| 10 | * | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in | |
| 15 | * the documentation and/or other materials provided with the | |
| 16 | * distribution. | |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 18 | * contributors may be used to endorse or promote products derived | |
| 19 | * from this software without specific, prior written permission. | |
| 20 | * | |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 32 | * SUCH DAMAGE. | |
| 33 | * | |
| cdb6e4e6 | 34 | * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.20 2008/07/18 00:19:53 dillon Exp $ |
| bf686dbe MD |
35 | */ |
| 36 | ||
| 37 | /* | |
| 38 | * HAMMER undo - undo buffer/FIFO management. | |
| 39 | */ | |
| 40 | ||
| 41 | #include "hammer.h" | |
| 42 | ||
| e8599db1 MD |
43 | static int hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2); |
| 44 | ||
| 45 | RB_GENERATE2(hammer_und_rb_tree, hammer_undo, rb_node, | |
| 46 | hammer_und_rb_compare, hammer_off_t, offset); | |
| 47 | ||
| bf686dbe MD |
48 | /* |
| 49 | * Convert a zone-3 undo offset into a zone-2 buffer offset. | |
| 50 | */ | |
| 51 | hammer_off_t | |
| 52 | hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp) | |
| 53 | { | |
| 54 | hammer_volume_t root_volume; | |
| 55 | hammer_blockmap_t undomap; | |
| bf686dbe MD |
56 | hammer_off_t result_offset; |
| 57 | int i; | |
| 58 | ||
| 59 | KKASSERT((zone3_off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_UNDO); | |
| 60 | root_volume = hammer_get_root_volume(hmp, errorp); | |
| 61 | if (*errorp) | |
| 62 | return(0); | |
| 0729c8c8 | 63 | undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| bf686dbe MD |
64 | KKASSERT(HAMMER_ZONE_DECODE(undomap->alloc_offset) == HAMMER_ZONE_UNDO_INDEX); |
| 65 | KKASSERT (zone3_off < undomap->alloc_offset); | |
| 66 | ||
| 67 | i = (zone3_off & HAMMER_OFF_SHORT_MASK) / HAMMER_LARGEBLOCK_SIZE; | |
| cb51be26 | 68 | result_offset = root_volume->ondisk->vol0_undo_array[i] + |
| bf686dbe MD |
69 | (zone3_off & HAMMER_LARGEBLOCK_MASK64); |
| 70 | ||
| 71 | hammer_rel_volume(root_volume, 0); | |
| 72 | return(result_offset); | |
| 73 | } | |
| 74 | ||
| 75 | /* | |
| 76 | * Generate an UNDO record for the block of data at the specified zone1 | |
| f90dde4c | 77 | * or zone2 offset. |
| e8599db1 MD |
78 | * |
| 79 | * The recovery code will execute UNDOs in reverse order, allowing overlaps. | |
| 80 | * All the UNDOs are executed together so if we already laid one down we | |
| 81 | * do not have to lay another one down for the same range. | |
| bf686dbe MD |
82 | */ |
| 83 | int | |
| 059819e3 | 84 | hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io, |
| f90dde4c | 85 | hammer_off_t zone_off, void *base, int len) |
| bf686dbe | 86 | { |
| d99d6bf5 | 87 | hammer_mount_t hmp; |
| bf686dbe | 88 | hammer_volume_t root_volume; |
| bf686dbe MD |
89 | hammer_blockmap_t undomap; |
| 90 | hammer_buffer_t buffer = NULL; | |
| bf686dbe MD |
91 | hammer_fifo_undo_t undo; |
| 92 | hammer_fifo_tail_t tail; | |
| 93 | hammer_off_t next_offset; | |
| bf686dbe MD |
94 | int error; |
| 95 | int bytes; | |
| 96 | ||
| d99d6bf5 MD |
97 | hmp = trans->hmp; |
| 98 | ||
| e8599db1 MD |
99 | /* |
| 100 | * Enter the offset into our undo history. If there is an existing | |
| 101 | * undo we do not have to generate a new one. | |
| 102 | */ | |
| d99d6bf5 | 103 | if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY) |
| e8599db1 | 104 | return(0); |
| e8599db1 | 105 | |
| b58c6388 | 106 | root_volume = trans->rootvol; |
| d99d6bf5 | 107 | undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 36f82b23 MD |
108 | |
| 109 | /* no undo recursion */ | |
| 110 | hammer_modify_volume(NULL, root_volume, NULL, 0); | |
| bf686dbe | 111 | |
| d99d6bf5 | 112 | hammer_lock_ex(&hmp->undo_lock); |
| f90dde4c | 113 | again: |
| bf686dbe MD |
114 | /* |
| 115 | * Allocate space in the FIFO | |
| 116 | */ | |
| f90dde4c MD |
117 | bytes = ((len + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + |
| 118 | sizeof(struct hammer_fifo_undo) + | |
| 119 | sizeof(struct hammer_fifo_tail); | |
| 06ad81ff | 120 | if (hammer_undo_space(trans) < bytes + HAMMER_BUFSIZE*2) |
| 1f07f686 | 121 | panic("hammer: insufficient undo FIFO space!"); |
| f90dde4c | 122 | |
| bf686dbe MD |
123 | next_offset = undomap->next_offset; |
| 124 | ||
| 125 | /* | |
| 126 | * Wrap next_offset | |
| 127 | */ | |
| 128 | if (undomap->next_offset == undomap->alloc_offset) { | |
| 129 | next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); | |
| 130 | undomap->next_offset = next_offset; | |
| bf686dbe MD |
131 | } |
| 132 | ||
| bf686dbe | 133 | /* |
| d99d6bf5 MD |
134 | * This is a tail-chasing FIFO, when we hit the start of a new |
| 135 | * buffer we don't have to read it in. | |
| bf686dbe | 136 | */ |
| d99d6bf5 MD |
137 | if ((next_offset & HAMMER_BUFMASK) == 0) |
| 138 | undo = hammer_bnew(hmp, next_offset, &error, &buffer); | |
| 139 | else | |
| 140 | undo = hammer_bread(hmp, next_offset, &error, &buffer); | |
| cdb6e4e6 MD |
141 | if (error) |
| 142 | goto done; | |
| 143 | ||
| d99d6bf5 MD |
144 | hammer_modify_buffer(NULL, buffer, NULL, 0); |
| 145 | ||
| 146 | KKASSERT(undomap->next_offset == next_offset); | |
| c9b9e29d | 147 | |
| bf686dbe MD |
148 | /* |
| 149 | * The FIFO entry would cross a buffer boundary, PAD to the end | |
| 150 | * of the buffer and try again. Due to our data alignment, the | |
| 151 | * worst case (smallest) PAD record is 8 bytes. PAD records only | |
| 152 | * populate the first 8 bytes of hammer_fifo_head and the tail may | |
| 153 | * be at the same offset as the head. | |
| 154 | */ | |
| c9b9e29d | 155 | if ((next_offset ^ (next_offset + bytes)) & ~HAMMER_BUFMASK64) { |
| bf686dbe MD |
156 | bytes = HAMMER_BUFSIZE - ((int)next_offset & HAMMER_BUFMASK); |
| 157 | tail = (void *)((char *)undo + bytes - sizeof(*tail)); | |
| 158 | if ((void *)undo != (void *)tail) { | |
| 159 | tail->tail_signature = HAMMER_TAIL_SIGNATURE; | |
| 160 | tail->tail_type = HAMMER_HEAD_TYPE_PAD; | |
| 161 | tail->tail_size = bytes; | |
| 162 | } | |
| 163 | undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; | |
| 164 | undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; | |
| 165 | undo->head.hdr_size = bytes; | |
| 19619882 | 166 | /* NO CRC */ |
| bf686dbe | 167 | undomap->next_offset += bytes; |
| 10a5d1ba | 168 | hammer_modify_buffer_done(buffer); |
| bf686dbe MD |
169 | goto again; |
| 170 | } | |
| 973c11b9 MD |
171 | if (hammer_debug_general & 0x0080) { |
| 172 | kprintf("undo %016llx %d %d\n", | |
| 173 | (long long)next_offset, bytes, len); | |
| 174 | } | |
| bf686dbe MD |
175 | |
| 176 | /* | |
| 177 | * We're good, create the entry. | |
| 178 | */ | |
| 179 | undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; | |
| f90dde4c | 180 | undo->head.hdr_type = HAMMER_HEAD_TYPE_UNDO; |
| bf686dbe | 181 | undo->head.hdr_size = bytes; |
| 059819e3 MD |
182 | undo->head.reserved01 = 0; |
| 183 | undo->head.hdr_crc = 0; | |
| f90dde4c | 184 | undo->undo_offset = zone_off; |
| bf686dbe MD |
185 | undo->undo_data_bytes = len; |
| 186 | bcopy(base, undo + 1, len); | |
| f90dde4c MD |
187 | |
| 188 | tail = (void *)((char *)undo + bytes - sizeof(*tail)); | |
| 189 | tail->tail_signature = HAMMER_TAIL_SIGNATURE; | |
| 190 | tail->tail_type = HAMMER_HEAD_TYPE_UNDO; | |
| 191 | tail->tail_size = bytes; | |
| 192 | ||
| 19619882 MD |
193 | KKASSERT(bytes >= sizeof(undo->head)); |
| 194 | undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ | |
| 195 | crc32(&undo->head + 1, bytes - sizeof(undo->head)); | |
| bf686dbe | 196 | undomap->next_offset += bytes; |
| c9b9e29d MD |
197 | |
| 198 | hammer_modify_buffer_done(buffer); | |
| cdb6e4e6 | 199 | done: |
| 10a5d1ba | 200 | hammer_modify_volume_done(root_volume); |
| d99d6bf5 MD |
201 | hammer_unlock(&hmp->undo_lock); |
| 202 | ||
| bf686dbe MD |
203 | if (buffer) |
| 204 | hammer_rel_buffer(buffer, 0); | |
| bf686dbe MD |
205 | return(error); |
| 206 | } | |
| 207 | ||
| e8599db1 MD |
208 | /* |
| 209 | * UNDO HISTORY API | |
| 210 | * | |
| 211 | * It is not necessary to layout an undo record for the same address space | |
| 212 | * multiple times. Maintain a cache of recent undo's. | |
| 213 | */ | |
| 214 | ||
| 215 | /* | |
| 216 | * Enter an undo into the history. Return EALREADY if the request completely | |
| 217 | * covers a previous request. | |
| 218 | */ | |
| 219 | int | |
| 220 | hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset, int bytes) | |
| 221 | { | |
| 222 | hammer_undo_t node; | |
| 223 | hammer_undo_t onode; | |
| 224 | ||
| 225 | node = RB_LOOKUP(hammer_und_rb_tree, &hmp->rb_undo_root, offset); | |
| 226 | if (node) { | |
| 227 | TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry); | |
| 228 | TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry); | |
| 229 | if (bytes <= node->bytes) | |
| 230 | return(EALREADY); | |
| 231 | node->bytes = bytes; | |
| 232 | return(0); | |
| 233 | } | |
| 234 | if (hmp->undo_alloc != HAMMER_MAX_UNDOS) { | |
| 235 | node = &hmp->undos[hmp->undo_alloc++]; | |
| 236 | } else { | |
| 237 | node = TAILQ_FIRST(&hmp->undo_lru_list); | |
| 238 | TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry); | |
| 239 | RB_REMOVE(hammer_und_rb_tree, &hmp->rb_undo_root, node); | |
| 240 | } | |
| 241 | node->offset = offset; | |
| 242 | node->bytes = bytes; | |
| 243 | TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry); | |
| 244 | onode = RB_INSERT(hammer_und_rb_tree, &hmp->rb_undo_root, node); | |
| 245 | KKASSERT(onode == NULL); | |
| 246 | return(0); | |
| 247 | } | |
| 248 | ||
| 249 | void | |
| 250 | hammer_clear_undo_history(hammer_mount_t hmp) | |
| 251 | { | |
| 252 | RB_INIT(&hmp->rb_undo_root); | |
| 253 | TAILQ_INIT(&hmp->undo_lru_list); | |
| 254 | hmp->undo_alloc = 0; | |
| 255 | } | |
| 256 | ||
| 257 | /* | |
| 06ad81ff MD |
258 | * Return how much of the undo FIFO has been used |
| 259 | * | |
| 260 | * The calculation includes undo FIFO space still reserved from a previous | |
| 261 | * flush (because it will still be run on recovery if a crash occurs and | |
| 262 | * we can't overwrite it yet). | |
| e8599db1 | 263 | */ |
| 1f07f686 | 264 | int64_t |
| 06ad81ff | 265 | hammer_undo_used(hammer_transaction_t trans) |
| 1f07f686 | 266 | { |
| 06ad81ff MD |
267 | hammer_blockmap_t cundomap; |
| 268 | hammer_blockmap_t dundomap; | |
| 1f07f686 | 269 | int64_t max_bytes; |
| c9b9e29d | 270 | int64_t bytes; |
| 1f07f686 | 271 | |
| 06ad81ff MD |
272 | cundomap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| 273 | dundomap = &trans->rootvol->ondisk-> | |
| 274 | vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; | |
| 1f07f686 | 275 | |
| 06ad81ff MD |
276 | if (dundomap->first_offset <= cundomap->next_offset) { |
| 277 | bytes = cundomap->next_offset - dundomap->first_offset; | |
| 1f07f686 | 278 | } else { |
| 06ad81ff MD |
279 | bytes = cundomap->alloc_offset - dundomap->first_offset + |
| 280 | (cundomap->next_offset & HAMMER_OFF_LONG_MASK); | |
| 1f07f686 | 281 | } |
| 06ad81ff | 282 | max_bytes = cundomap->alloc_offset & HAMMER_OFF_SHORT_MASK; |
| c9b9e29d MD |
283 | KKASSERT(bytes <= max_bytes); |
| 284 | return(bytes); | |
| 285 | } | |
| 286 | ||
| 06ad81ff MD |
287 | /* |
| 288 | * Return how much of the undo FIFO is available for new records. | |
| 289 | */ | |
| c9b9e29d | 290 | int64_t |
| 06ad81ff | 291 | hammer_undo_space(hammer_transaction_t trans) |
| c9b9e29d MD |
292 | { |
| 293 | hammer_blockmap_t rootmap; | |
| 294 | int64_t max_bytes; | |
| 295 | ||
| 06ad81ff | 296 | rootmap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; |
| c9b9e29d | 297 | max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK; |
| 06ad81ff | 298 | return(max_bytes - hammer_undo_used(trans)); |
| 1f07f686 MD |
299 | } |
| 300 | ||
| 301 | int64_t | |
| 302 | hammer_undo_max(hammer_mount_t hmp) | |
| 303 | { | |
| 304 | hammer_blockmap_t rootmap; | |
| 305 | int64_t max_bytes; | |
| 306 | ||
| 307 | rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; | |
| c9b9e29d | 308 | max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK; |
| 1f07f686 MD |
309 | |
| 310 | return(max_bytes); | |
| 311 | } | |
| 312 | ||
| e8599db1 MD |
313 | static int |
| 314 | hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2) | |
| 315 | { | |
| 316 | if (node1->offset < node2->offset) | |
| 317 | return(-1); | |
| 318 | if (node1->offset > node2->offset) | |
| 319 | return(1); | |
| 320 | return(0); | |
| 321 | } | |
| 322 |