HAMMER VFS - Version 4 part 1/many - UNDO FIFO layout work.
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
CommitLineData
4d75d829
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
00f16fad 34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.29 2008/07/26 05:36:21 dillon Exp $
4d75d829
MD
35 */
36
37#include "hammer.h"
38
f90dde4c
MD
39static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40 hammer_off_t end_off);
02428fb6
MD
41static int hammer_check_head_signature(hammer_fifo_head_t head,
42 hammer_off_t beg_off);
f90dde4c
MD
43static void hammer_recover_copy_undo(hammer_off_t undo_offset,
44 char *src, char *dst, int bytes);
02428fb6
MD
45static hammer_fifo_any_t hammer_recover_scan_fwd(hammer_mount_t hmp,
46 hammer_volume_t root_volume,
47 hammer_off_t *scan_offsetp,
48 int *errorp, struct hammer_buffer **bufferp);
49static hammer_fifo_any_t hammer_recover_scan_rev(hammer_mount_t hmp,
50 hammer_volume_t root_volume,
51 hammer_off_t *scan_offsetp,
52 int *errorp, struct hammer_buffer **bufferp);
d36ec43b 53#if 0
f90dde4c 54static void hammer_recover_debug_dump(int w, char *buf, int bytes);
d36ec43b 55#endif
51c35492 56static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
02428fb6 57 hammer_fifo_undo_t undo);
4d75d829
MD
58
59/*
02428fb6
MD
60 * Recover filesystem meta-data on mount. This procedure figures out the
61 * UNDO FIFO range and runs the UNDOs backwards. The FIFO pointers are not
62 * resynchronized by this procedure.
63 *
64 * This procedure is run near the beginning of the mount sequence, before
65 * any B-Tree or high-level accesses are enabled, and is responsible for
66 * restoring the meta-data to a consistent state. High level HAMMER data
67 * structures (such as the B-Tree) cannot be accessed here.
0729c8c8
MD
68 *
69 * NOTE: No information from the root volume has been cached in the
02428fb6
MD
70 * hammer_mount structure yet, so we need to access the root volume's
71 * buffer directly.
72 *
73 * NOTE:
4d75d829
MD
74 */
75int
02428fb6 76hammer_recover_stage1(hammer_mount_t hmp, hammer_volume_t root_volume)
4d75d829 77{
f90dde4c
MD
78 hammer_blockmap_t rootmap;
79 hammer_buffer_t buffer;
80 hammer_off_t scan_offset;
02428fb6 81 hammer_off_t scan_offset_save;
f90dde4c 82 hammer_off_t bytes;
02428fb6 83 hammer_fifo_any_t head;
9f5097dc
MD
84 hammer_off_t first_offset;
85 hammer_off_t last_offset;
02428fb6 86 u_int32_t seqno;
f90dde4c 87 int error;
b33e2cc0
MD
88
89 /*
02428fb6 90 * Examine the UNDO FIFO indices in the volume header.
4d75d829 91 */
f90dde4c 92 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
9f5097dc
MD
93 first_offset = rootmap->first_offset;
94 last_offset = rootmap->next_offset;
02428fb6
MD
95 buffer = NULL;
96 error = 0;
97
98 if (first_offset > rootmap->alloc_offset ||
99 last_offset > rootmap->alloc_offset) {
100 kprintf("HAMMER(%s) Illegal UNDO FIFO index range "
101 "%016jx, %016jx limit %016jx\n",
102 root_volume->ondisk->vol_name,
103 (intmax_t)first_offset,
104 (intmax_t)last_offset,
105 (intmax_t)rootmap->alloc_offset);
106 error = EIO;
107 goto done;
108 }
109
110 /*
111 * In HAMMER version 4+ filesystems the volume header does NOT
112 * contain definitive UNDO FIFO state. In particular, the
113 * rootmap->next_offset may not be indexed completely to the
114 * end of the active UNDO FIFO.
115 */
116 if (hmp->version >= HAMMER_VOL_VERSION_FOUR) {
117 /*
118 * To find the definitive range we must first scan backwards
119 * from first_offset to locate the first real record and
120 * extract the sequence number from it. This record is not
121 * part of the active undo space.
122 */
123 scan_offset = first_offset;
124 seqno = 0;
125
126 for (;;) {
127 head = hammer_recover_scan_rev(hmp, root_volume,
128 &scan_offset,
129 &error, &buffer);
130 if (error)
131 break;
132 if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
133 seqno = head->head.hdr_seq;
134 break;
135 }
136 }
137 if (error) {
138 kprintf("HAMMER(%s) meta-data recovery failure "
139 "during seqno backscan\n",
140 root_volume->ondisk->vol_name);
141 goto done;
142 }
143
144 /*
145 * Scan forwards from first_offset and (seqno+1) looking
146 * for a sequence space discontinuity. This denotes the
147 * end of the active FIFO area.
148 *
149 * NOTE: For the case where the FIFO is empty the very first
150 * record we find will be discontinuous.
151 *
152 * NOTE: Do not include trailing PADs in the scan range,
153 * and remember the returned scan_offset after a
154 * fwd iteration points to the end of the returned
155 * record.
156 */
157 kprintf("HAMMER(%s) meta-data recovery check seqno=%08x\n",
158 root_volume->ondisk->vol_name,
159 seqno);
160
161 scan_offset = first_offset;
162 scan_offset_save = scan_offset;
163 ++seqno;
164 for (;;) {
165 head = hammer_recover_scan_fwd(hmp, root_volume,
166 &scan_offset,
167 &error, &buffer);
168 if (error)
169 break;
170 if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
171 if (seqno != head->head.hdr_seq) {
172 scan_offset = scan_offset_save;
173 break;
174 }
175 scan_offset_save = scan_offset;
176 ++seqno;
177 }
178
179#if 0
180 /*
181 * If the forward scan is grossly ahead of last_offset
182 * then something is wrong. last_offset is supposed
183 * to be flushed out
184 */
185 if (last_offset >= scan_offset) {
186 bytes = last_offset - scan_offset;
187 } else {
188 bytes = rootmap->alloc_offset - scan_offset +
189 (last_offset & HAMMER_OFF_LONG_MASK);
190 }
191 if (bytes >
192 (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK) *
193 4 / 5) {
194 kprintf("HAMMER(%s) meta-data forward scan is "
195 "grossly beyond the last_offset in "
196 "the volume header, this can't be "
197 "right.\n",
198 root_volume->ondisk->vol_name);
199 error = EIO;
200 break;
201 }
202#endif
203 }
9f5097dc 204
02428fb6
MD
205 /*
206 * Store the seqno. This will be the next seqno we lay down
207 * when generating new UNDOs.
208 */
209 hmp->undo_seqno = seqno;
210 if (error) {
211 kprintf("HAMMER(%s) meta-data recovery failure "
212 "during seqno fwdscan\n",
213 root_volume->ondisk->vol_name);
214 goto done;
215 }
216 last_offset = scan_offset;
217 kprintf("HAMMER(%s) meta-data recovery range %016jx-%016jx "
218 "(invol %016jx) endseqno=%08x\n",
219 root_volume->ondisk->vol_name,
220 (intmax_t)first_offset,
221 (intmax_t)last_offset,
222 (intmax_t)rootmap->next_offset,
223 seqno);
224 }
225
226 /*
227 * Calculate the size of the active portion of the FIFO. If the
228 * FIFO is empty the filesystem is clean and no further action is
229 * needed.
230 */
9f5097dc
MD
231 if (last_offset >= first_offset) {
232 bytes = last_offset - first_offset;
c9b9e29d 233 } else {
9f5097dc
MD
234 bytes = rootmap->alloc_offset - first_offset +
235 (last_offset & HAMMER_OFF_LONG_MASK);
c9b9e29d 236 }
02428fb6
MD
237 if (bytes == 0) {
238 error = 0;
239 goto done;
240 }
241
242 kprintf("HAMMER(%s) Start meta-data recovery %016jx - %016jx "
243 "(%jd bytes of UNDO)%s\n",
09ac686b 244 root_volume->ondisk->vol_name,
02428fb6
MD
245 (intmax_t)first_offset,
246 (intmax_t)last_offset,
247 (intmax_t)bytes,
51c35492 248 (hmp->ronly ? " (RO)" : "(RW)"));
c9b9e29d
MD
249 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
250 kprintf("Undo size is absurd, unable to mount\n");
02428fb6
MD
251 error = EIO;
252 goto done;
c9b9e29d 253 }
4d75d829
MD
254
255 /*
f90dde4c 256 * Scan the UNDOs backwards.
4d75d829 257 */
9f5097dc 258 scan_offset = last_offset;
4d75d829 259
f90dde4c 260 while ((int64_t)bytes > 0) {
02428fb6
MD
261 KKASSERT(scan_offset != first_offset);
262 head = hammer_recover_scan_rev(hmp, root_volume,
263 &scan_offset, &error, &buffer);
264 if (error)
f90dde4c 265 break;
02428fb6 266 error = hammer_recover_undo(hmp, root_volume, &head->undo);
f90dde4c 267 if (error) {
02428fb6 268 kprintf("HAMMER(%s) UNDO record at %016jx failed\n",
f90dde4c 269 root_volume->ondisk->vol_name,
02428fb6 270 (intmax_t)scan_offset - head->head.hdr_size);
b33e2cc0 271 break;
f90dde4c 272 }
02428fb6 273 bytes -= head->head.hdr_size;
06ad81ff
MD
274
275 /*
312de84d
MD
276 * If too many dirty buffers have built up we have to flush'm
277 * out. As long as we do not flush out the volume header
278 * a crash here should not cause any problems.
279 *
280 * buffer must be released so the flush can assert that
281 * all buffers are idle.
06ad81ff
MD
282 */
283 if (hammer_flusher_meta_limit(hmp)) {
312de84d
MD
284 if (buffer) {
285 hammer_rel_buffer(buffer, 0);
286 buffer = NULL;
287 }
06ad81ff
MD
288 if (hmp->ronly == 0) {
289 hammer_recover_flush_buffers(hmp, root_volume,
290 0);
291 kprintf("HAMMER(%s) Continuing recovery\n",
292 root_volume->ondisk->vol_name);
00f16fad 293 } else {
06ad81ff
MD
294 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n",
295 root_volume->ondisk->vol_name);
00f16fad
MD
296 error = EIO;
297 break;
06ad81ff
MD
298 }
299 }
4d75d829 300 }
c9b9e29d 301done:
02428fb6 302 if (buffer) {
f90dde4c 303 hammer_rel_buffer(buffer, 0);
02428fb6
MD
304 buffer = NULL;
305 }
51c35492
MD
306
307 /*
9f5097dc 308 * After completely flushing all the recovered buffers the volume
02428fb6 309 * header will also be flushed.
51c35492 310 */
9f5097dc
MD
311 if (root_volume->io.recovered == 0) {
312 hammer_ref_volume(root_volume);
313 root_volume->io.recovered = 1;
51c35492 314 }
9f5097dc
MD
315
316 /*
02428fb6
MD
317 * Finish up flushing (or discarding) recovered buffers. FIFO
318 * indices in the volume header are updated to the actual undo
319 * range but will not be collapsed until stage 2.
9f5097dc 320 */
00f16fad
MD
321 if (error == 0) {
322 hammer_modify_volume(NULL, root_volume, NULL, 0);
323 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
02428fb6 324 rootmap->first_offset = first_offset;
00f16fad
MD
325 rootmap->next_offset = last_offset;
326 hammer_modify_volume_done(root_volume);
327 if (hmp->ronly == 0)
328 hammer_recover_flush_buffers(hmp, root_volume, 1);
329 } else {
330 hammer_recover_flush_buffers(hmp, root_volume, -1);
331 }
02428fb6
MD
332 kprintf("HAMMER(%s) End meta-data recovery\n",
333 root_volume->ondisk->vol_name);
f90dde4c 334 return (error);
4d75d829
MD
335}
336
02428fb6
MD
337/*
338 * Execute redo operations
339 *
340 * This procedure is run at the end of the mount sequence, after the hammer
341 * mount structure has been completely initialized but before the filesystem
342 * goes live. It can access standard cursors, the B-Tree, flush the
343 * filesystem, and so forth.
344 *
345 * This code may only be called for read-write mounts or when a mount
346 * switches from read-only to read-write.
347 *
348 * The stage1 code will have already calculated the correct FIFO range
349 * and stored it in the rootmap.
350 */
351int
352hammer_recover_stage2(hammer_mount_t hmp, hammer_volume_t root_volume)
4d75d829 353{
02428fb6
MD
354 hammer_blockmap_t rootmap;
355 hammer_buffer_t buffer;
356 hammer_off_t scan_offset;
357 hammer_off_t bytes;
358 hammer_fifo_any_t head;
359 hammer_off_t first_offset;
360 hammer_off_t last_offset;
361 int error;
362
363 /*
364 * Stage 2 can only be run on a RW mount, or when the mount is
365 * switched from RO to RW. It must be run only once.
366 */
367 KKASSERT(hmp->ronly == 0);
4d75d829 368
02428fb6
MD
369 if (hmp->hflags & HMNT_STAGE2)
370 return(0);
371 hmp->hflags |= HMNT_STAGE2;
b33e2cc0
MD
372
373 /*
02428fb6
MD
374 * Examine the UNDO FIFO. If it is empty the filesystem is clean
375 * and no action need be taken.
b33e2cc0 376 */
02428fb6
MD
377 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
378 first_offset = rootmap->first_offset;
379 last_offset = rootmap->next_offset;
380 if (first_offset == last_offset)
381 return(0);
382
383 if (last_offset >= first_offset) {
384 bytes = last_offset - first_offset;
385 } else {
386 bytes = rootmap->alloc_offset - first_offset +
387 (last_offset & HAMMER_OFF_LONG_MASK);
388 }
389 kprintf("HAMMER(%s) Start redo recovery %016jx - %016jx "
390 "(%jd bytes of UNDO)%s\n",
391 root_volume->ondisk->vol_name,
392 (intmax_t)first_offset,
393 (intmax_t)last_offset,
394 (intmax_t)bytes,
395 (hmp->ronly ? " (RO)" : "(RW)"));
396 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
397 kprintf("Undo size is absurd, unable to mount\n");
398 return(EIO);
b33e2cc0
MD
399 }
400
b33e2cc0 401 /*
02428fb6 402 * Scan the REDOs forwards.
b33e2cc0 403 */
02428fb6
MD
404 scan_offset = first_offset;
405 buffer = NULL;
406
407 while (bytes) {
408 KKASSERT(scan_offset != last_offset);
409
410 head = hammer_recover_scan_fwd(hmp, root_volume,
411 &scan_offset, &error, &buffer);
412 if (error)
413 break;
414
415#if 0
416 error = hammer_recover_redo(hmp, root_volume, &head->redo);
417#endif
418 if (error) {
419 kprintf("HAMMER(%s) UNDO record at %016jx failed\n",
420 root_volume->ondisk->vol_name,
421 (intmax_t)scan_offset - head->head.hdr_size);
422 break;
f90dde4c 423 }
02428fb6
MD
424 bytes -= head->head.hdr_size;
425 }
426 if (buffer) {
427 hammer_rel_buffer(buffer, 0);
428 buffer = NULL;
f90dde4c 429 }
4d75d829
MD
430
431 /*
02428fb6
MD
432 * Finish up flushing (or discarding) recovered buffers by executing
433 * a normal flush cycle. Setting HMNT_UNDO_DIRTY bypasses degenerate
434 * case tests and forces the flush in order to update the FIFO indices.
435 *
436 * If a crash occurs during the flush the entire undo/redo will be
437 * re-run during recovery on the next mount.
4d75d829 438 */
02428fb6
MD
439 if (error == 0) {
440 if (rootmap->first_offset != rootmap->next_offset)
441 hmp->hflags |= HMNT_UNDO_DIRTY;
442 hammer_flusher_sync(hmp);
4d75d829 443 }
02428fb6
MD
444 kprintf("HAMMER(%s) End redo recovery\n",
445 root_volume->ondisk->vol_name);
446 return (error);
4d75d829
MD
447}
448
02428fb6
MD
449/*
450 * Scan backwards from *scan_offsetp, return the FIFO record prior to the
451 * record at *scan_offsetp or NULL if an error occured.
452 *
453 * On return *scan_offsetp will be the offset of the returned record.
454 */
455hammer_fifo_any_t
456hammer_recover_scan_rev(hammer_mount_t hmp, hammer_volume_t root_volume,
457 hammer_off_t *scan_offsetp,
458 int *errorp, struct hammer_buffer **bufferp)
4d75d829 459{
02428fb6
MD
460 hammer_off_t scan_offset;
461 hammer_blockmap_t rootmap;
462 hammer_fifo_any_t head;
f90dde4c 463 hammer_fifo_tail_t tail;
4d75d829 464
02428fb6
MD
465 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
466 scan_offset = *scan_offsetp;
467
468 if (hammer_debug_general & 0x0080)
469 kprintf("rev scan_offset %016jx\n", (intmax_t)scan_offset);
470 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0))
471 scan_offset = rootmap->alloc_offset;
472 if (scan_offset - sizeof(*tail) <
473 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
474 kprintf("HAMMER(%s) UNDO record at %016jx FIFO underflow\n",
475 root_volume->ondisk->vol_name,
476 (intmax_t)scan_offset);
477 *errorp = EIO;
478 return (NULL);
9944ae54 479 }
02428fb6
MD
480 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
481 errorp, bufferp);
482 if (*errorp) {
483 kprintf("HAMMER(%s) Unable to read UNDO TAIL "
484 "at %016jx\n",
485 root_volume->ondisk->vol_name,
486 (intmax_t)scan_offset - sizeof(*tail));
487 return (NULL);
f90dde4c 488 }
02428fb6
MD
489
490 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
491 kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
492 "at %016jx\n",
493 root_volume->ondisk->vol_name,
494 (intmax_t)scan_offset - sizeof(*tail));
495 *errorp = EIO;
496 return (NULL);
4d75d829 497 }
02428fb6
MD
498 head = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
499 *scan_offsetp = scan_offset - head->head.hdr_size;
500
501 return (head);
502}
503
504/*
505 * Scan forwards from *scan_offsetp, return the FIFO record or NULL if
506 * an error occured.
507 *
508 * On return *scan_offsetp will be the offset of the record following
509 * the returned record.
510 */
511hammer_fifo_any_t
512hammer_recover_scan_fwd(hammer_mount_t hmp, hammer_volume_t root_volume,
513 hammer_off_t *scan_offsetp,
514 int *errorp, struct hammer_buffer **bufferp)
515{
516 hammer_off_t scan_offset;
517 hammer_blockmap_t rootmap;
518 hammer_fifo_any_t head;
519
520 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
521 scan_offset = *scan_offsetp;
522
523 if (hammer_debug_general & 0x0080)
524 kprintf("fwd scan_offset %016jx\n", (intmax_t)scan_offset);
525 if (scan_offset == rootmap->alloc_offset)
526 scan_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0);
527
528 head = hammer_bread(hmp, scan_offset, errorp, bufferp);
529 if (*errorp) {
530 kprintf("HAMMER(%s) Unable to read UNDO HEAD at %016jx\n",
531 root_volume->ondisk->vol_name,
532 (intmax_t)scan_offset);
533 return (NULL);
534 }
535
536 if (hammer_check_head_signature(&head->head, scan_offset) != 0) {
537 kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
538 "at %016jx\n",
539 root_volume->ondisk->vol_name,
540 (intmax_t)scan_offset);
541 *errorp = EIO;
542 return (NULL);
543 }
544 scan_offset += head->head.hdr_size;
545 if (scan_offset == rootmap->alloc_offset)
546 scan_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0);
547 *scan_offsetp = scan_offset;
548
549 return (head);
550}
551
552/*
553 * Helper function for hammer_check_{head,tail}_signature(). Check stuff
554 * once the head and tail has been established.
555 *
556 * This function validates the entire FIFO record wrapper.
557 */
558static __inline
559int
560_hammer_check_signature(hammer_fifo_head_t head, hammer_fifo_tail_t tail,
561 hammer_off_t beg_off)
562{
563 hammer_off_t end_off;
564 u_int32_t crc;
565 int bytes;
4d75d829
MD
566
567 /*
02428fb6
MD
568 * Check signatures. The tail signature is allowed to be the
569 * head signature only for 8-byte PADs.
4d75d829 570 */
02428fb6
MD
571 if (head->hdr_signature != HAMMER_HEAD_SIGNATURE) {
572 kprintf("HAMMER: FIFO record bad head signature "
573 "%04x at %016jx\n",
574 head->hdr_signature,
575 (intmax_t)beg_off);
576 return(2);
577 }
578 if (head->hdr_size < HAMMER_HEAD_ALIGN ||
579 (head->hdr_size & HAMMER_HEAD_ALIGN_MASK)) {
580 kprintf("HAMMER: FIFO record unaligned or bad size"
581 "%04x at %016jx\n",
582 head->hdr_size,
583 (intmax_t)beg_off);
584 return(2);
585 }
586 end_off = beg_off + head->hdr_size;
587
588 if (head->hdr_type != HAMMER_HEAD_TYPE_PAD ||
589 (size_t)(end_off - beg_off) != sizeof(*tail)) {
590 if (head->hdr_type != tail->tail_type) {
591 kprintf("HAMMER: FIFO record head/tail type mismatch "
592 "%04x %04x at %016jx\n",
593 head->hdr_type, tail->tail_type,
594 (intmax_t)beg_off);
595 return(2);
596 }
597 if (head->hdr_size != tail->tail_size) {
598 kprintf("HAMMER: FIFO record head/tail size mismatch "
599 "%04x %04x at %016jx\n",
600 head->hdr_size, tail->tail_size,
601 (intmax_t)beg_off);
602 return(2);
603 }
604 if (tail->tail_signature != HAMMER_TAIL_SIGNATURE) {
605 kprintf("HAMMER: FIFO record bad tail signature "
606 "%04x at %016jx\n",
607 tail->tail_signature,
608 (intmax_t)beg_off);
609 return(3);
610 }
611 }
9944ae54
MD
612
613 /*
02428fb6
MD
614 * Non-PAD records must have a CRC and must be sized at
615 * least large enough to fit the head and tail.
09ac686b 616 */
02428fb6
MD
617 if (head->hdr_type != HAMMER_HEAD_TYPE_PAD) {
618 crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^
619 crc32(head + 1, head->hdr_size - sizeof(*head));
620 if (head->hdr_crc != crc) {
621 kprintf("HAMMER: FIFO record CRC failed %08x %08x "
622 "at %016jx\n",
623 head->hdr_crc, crc,
624 (intmax_t)beg_off);
625 return(EIO);
626 }
627 if (head->hdr_size < sizeof(*head) + sizeof(*tail)) {
628 kprintf("HAMMER: FIFO record too small "
629 "%04x at %016jx\n",
630 head->hdr_size,
631 (intmax_t)beg_off);
632 return(EIO);
633 }
09ac686b
MD
634 }
635
09ac686b 636 /*
f90dde4c 637 * Check the tail
9944ae54 638 */
02428fb6
MD
639 bytes = head->hdr_size;
640 tail = (void *)((char *)head + bytes - sizeof(*tail));
641 if (tail->tail_size != head->hdr_size) {
642 kprintf("HAMMER: Bad tail size %04x vs %04x at %016jx\n",
643 tail->tail_size, head->hdr_size,
644 (intmax_t)beg_off);
f90dde4c
MD
645 return(EIO);
646 }
02428fb6
MD
647 if (tail->tail_type != head->hdr_type) {
648 kprintf("HAMMER: Bad tail type %04x vs %04x at %016jx\n",
649 tail->tail_type, head->hdr_type,
650 (intmax_t)beg_off);
f90dde4c 651 return(EIO);
4d75d829
MD
652 }
653
02428fb6
MD
654 return(0);
655}
656
657/*
658 * Check that the FIFO record is in-bounds given the head and the
659 * hammer offset.
660 *
661 * Also checks that the head and tail structures agree with each other,
662 * but does not check beyond the signature, type, and size.
663 */
664static int
665hammer_check_head_signature(hammer_fifo_head_t head, hammer_off_t beg_off)
666{
667 hammer_fifo_tail_t tail;
668 hammer_off_t end_off;
669
670 /*
671 * head overlaps buffer boundary. This could be a PAD so only
672 * check the minimum PAD size here.
673 */
674 if (((beg_off + sizeof(*tail) - 1) ^ (beg_off)) & ~HAMMER_BUFMASK64)
675 return(1);
676
677 /*
678 * Calculate the ending offset and make sure the record does
679 * not cross a buffer boundary.
680 */
681 end_off = beg_off + head->hdr_size;
682 if ((beg_off ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
683 return(1);
684 tail = (void *)((char *)head + head->hdr_size - sizeof(*tail));
685 return (_hammer_check_signature(head, tail, beg_off));
686}
687
688/*
689 * Check that the FIFO record is in-bounds given the tail and the
690 * hammer offset. The offset is pointing at the ending boundary of the
691 * record.
692 *
693 * Also checks that the head and tail structures agree with each other,
694 * but does not check beyond the signature, type, and size.
695 */
696static int
697hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
698{
699 hammer_fifo_head_t head;
700 hammer_off_t beg_off;
701
4d75d829 702 /*
02428fb6
MD
703 * tail overlaps buffer boundary
704 */
705 if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
706 return(1);
707
708 /*
709 * Calculate the begining offset and make sure the record does
710 * not cross a buffer boundary.
4d75d829 711 */
02428fb6
MD
712 beg_off = end_off - tail->tail_size;
713 if ((beg_off ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
714 return(1);
715 head = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
716 return (_hammer_check_signature(head, tail, beg_off));
717}
718
719static int
720hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
721 hammer_fifo_undo_t undo)
722{
723 hammer_volume_t volume;
724 hammer_buffer_t buffer;
725 hammer_off_t buf_offset;
726 int zone;
727 int error;
728 int vol_no;
729 int bytes;
730 u_int32_t offset;
731
732 /*
733 * Only process UNDO records. Flag if we find other records to
734 * optimize stage2 recovery.
735 */
736 if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO) {
737 if (undo->head.hdr_type == HAMMER_HEAD_TYPE_REDO)
738 hmp->hflags |= HMNT_HASREDO;
f90dde4c 739 return(0);
02428fb6 740 }
4d75d829
MD
741
742 /*
f90dde4c 743 * Validate the UNDO record.
4d75d829 744 */
02428fb6
MD
745 bytes = undo->head.hdr_size - sizeof(*undo) -
746 sizeof(struct hammer_fifo_tail);
747 if (bytes < 0 || undo->undo_data_bytes < 0 ||
748 undo->undo_data_bytes > bytes) {
f90dde4c 749 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
02428fb6 750 undo->undo_data_bytes, bytes);
f90dde4c 751 return(EIO);
4d75d829
MD
752 }
753
02428fb6
MD
754 bytes = undo->undo_data_bytes;
755
4d75d829 756 /*
f90dde4c
MD
757 * The undo offset may only be a zone-1 or zone-2 offset.
758 *
759 * Currently we only support a zone-1 offset representing the
760 * volume header.
4d75d829 761 */
f90dde4c
MD
762 zone = HAMMER_ZONE_DECODE(undo->undo_offset);
763 offset = undo->undo_offset & HAMMER_BUFMASK;
4d75d829 764
02428fb6 765 if (offset + bytes > HAMMER_BUFSIZE) {
f90dde4c
MD
766 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
767 return (EIO);
768 }
4d75d829 769
f90dde4c
MD
770 switch(zone) {
771 case HAMMER_ZONE_RAW_VOLUME_INDEX:
772 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
773 volume = hammer_get_volume(hmp, vol_no, &error);
774 if (volume == NULL) {
775 kprintf("HAMMER: UNDO record, "
776 "cannot access volume %d\n", vol_no);
777 break;
4d75d829 778 }
f90dde4c
MD
779 hammer_modify_volume(NULL, volume, NULL, 0);
780 hammer_recover_copy_undo(undo->undo_offset,
781 (char *)(undo + 1),
782 (char *)volume->ondisk + offset,
02428fb6 783 bytes);
f90dde4c 784 hammer_modify_volume_done(volume);
51c35492
MD
785
786 /*
9f5097dc
MD
787 * Multiple modifications may be made to the same buffer.
788 * Also, the volume header cannot be written out until
789 * everything else has been flushed. This also
51c35492
MD
790 * covers the read-only case by preventing the kernel from
791 * flushing the buffer.
792 */
793 if (volume->io.recovered == 0)
794 volume->io.recovered = 1;
795 else
796 hammer_rel_volume(volume, 0);
f90dde4c
MD
797 break;
798 case HAMMER_ZONE_RAW_BUFFER_INDEX:
2f85fa4d 799 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
4a2796f3
MD
800 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
801 0, &error);
f90dde4c
MD
802 if (buffer == NULL) {
803 kprintf("HAMMER: UNDO record, "
02428fb6
MD
804 "cannot access buffer %016jx\n",
805 (intmax_t)undo->undo_offset);
f90dde4c 806 break;
4d75d829 807 }
f90dde4c
MD
808 hammer_modify_buffer(NULL, buffer, NULL, 0);
809 hammer_recover_copy_undo(undo->undo_offset,
810 (char *)(undo + 1),
811 (char *)buffer->ondisk + offset,
02428fb6 812 bytes);
f90dde4c 813 hammer_modify_buffer_done(buffer);
51c35492
MD
814
815 /*
816 * Multiple modifications may be made to the same buffer,
817 * improve performance by delaying the flush. This also
818 * covers the read-only case by preventing the kernel from
819 * flushing the buffer.
820 */
821 if (buffer->io.recovered == 0)
822 buffer->io.recovered = 1;
823 else
824 hammer_rel_buffer(buffer, 0);
f90dde4c
MD
825 break;
826 default:
827 kprintf("HAMMER: Corrupt UNDO record\n");
828 error = EIO;
4d75d829 829 }
f90dde4c 830 return (error);
4d75d829
MD
831}
832
f90dde4c
MD
833static void
834hammer_recover_copy_undo(hammer_off_t undo_offset,
835 char *src, char *dst, int bytes)
4d75d829 836{
973c11b9 837 if (hammer_debug_general & 0x0080) {
02428fb6
MD
838 kprintf("UNDO %016jx: %d\n",
839 (intmax_t)undo_offset, bytes);
973c11b9 840 }
ec4e8497 841#if 0
02428fb6 842 kprintf("UNDO %016jx:", (intmax_t)undo_offset);
f90dde4c
MD
843 hammer_recover_debug_dump(22, dst, bytes);
844 kprintf("%22s", "to:");
845 hammer_recover_debug_dump(22, src, bytes);
ec4e8497 846#endif
f90dde4c 847 bcopy(src, dst, bytes);
4d75d829
MD
848}
849
d36ec43b
MD
850#if 0
851
f90dde4c
MD
852static void
853hammer_recover_debug_dump(int w, char *buf, int bytes)
4d75d829 854{
f90dde4c 855 int i;
4d75d829 856
f90dde4c
MD
857 for (i = 0; i < bytes; ++i) {
858 if (i && (i & 15) == 0)
859 kprintf("\n%*.*s", w, w, "");
860 kprintf(" %02x", (unsigned char)buf[i]);
b33e2cc0 861 }
f90dde4c 862 kprintf("\n");
4d75d829
MD
863}
864
d36ec43b 865#endif
51c35492
MD
866
867/*
9f5097dc
MD
868 * Flush recovered buffers from recovery operations. The call to this
869 * routine may be delayed if a read-only mount was made and then later
870 * upgraded to read-write.
871 *
872 * The volume header is always written last. The UNDO FIFO will be forced
873 * to zero-length by setting next_offset to first_offset. This leaves the
874 * (now stale) UNDO information used to recover the disk available for
875 * forensic analysis.
00f16fad
MD
876 *
877 * final is typically 0 or 1. The volume header is only written if final
878 * is 1. If final is -1 the recovered buffers are discarded instead of
879 * written and root_volume can also be passed as NULL in that case.
51c35492
MD
880 */
881static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
882static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
883
884void
06ad81ff
MD
885hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
886 int final)
51c35492 887{
af209b0f
MD
888 /*
889 * Flush the buffers out asynchronously, wait for all the I/O to
890 * complete, then do it again to destroy the buffer cache buffer
891 * so it doesn't alias something later on.
892 */
893 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
00f16fad 894 hammer_recover_flush_buffer_callback, &final);
af209b0f 895 hammer_io_wait_all(hmp, "hmrrcw");
0832c9bb 896 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
00f16fad 897 hammer_recover_flush_buffer_callback, &final);
9f5097dc 898
00f16fad
MD
899 /*
900 * Flush all volume headers except the root volume. If final < 0
901 * we discard all volume headers including the root volume.
902 */
903 if (final >= 0) {
904 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
905 hammer_recover_flush_volume_callback, root_volume);
906 } else {
907 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
908 hammer_recover_flush_volume_callback, NULL);
909 }
9f5097dc 910
af209b0f 911 /*
00f16fad 912 * Finalize the root volume header.
af209b0f 913 */
00f16fad 914 if (root_volume && root_volume->io.recovered && final > 0) {
51c35492 915 crit_enter();
f5a07a7a
MD
916 while (hmp->io_running_space > 0)
917 tsleep(&hmp->io_running_space, 0, "hmrflx", 0);
51c35492
MD
918 crit_exit();
919 root_volume->io.recovered = 0;
710733a6 920 hammer_io_flush(&root_volume->io, 0);
51c35492
MD
921 hammer_rel_volume(root_volume, 0);
922 }
923}
924
00f16fad
MD
925/*
926 * Callback to flush volume headers. If discarding data will be NULL and
927 * all volume headers (including the root volume) will be discarded.
928 * Otherwise data is the root_volume and we flush all volume headers
929 * EXCEPT the root_volume.
930 */
51c35492
MD
931static
932int
933hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
934{
935 hammer_volume_t root_volume = data;
936
51c35492
MD
937 if (volume->io.recovered && volume != root_volume) {
938 volume->io.recovered = 0;
00f16fad 939 if (root_volume != NULL)
710733a6 940 hammer_io_flush(&volume->io, 0);
00f16fad
MD
941 else
942 hammer_io_clear_modify(&volume->io, 1);
51c35492
MD
943 hammer_rel_volume(volume, 0);
944 }
945 return(0);
946}
947
948static
949int
950hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
951{
00f16fad
MD
952 int final = *(int *)data;
953
51c35492
MD
954 if (buffer->io.recovered) {
955 buffer->io.recovered = 0;
af209b0f 956 buffer->io.reclaim = 1;
00f16fad
MD
957 if (final < 0)
958 hammer_io_clear_modify(&buffer->io, 1);
959 else
710733a6 960 hammer_io_flush(&buffer->io, 0);
af209b0f
MD
961 hammer_rel_buffer(buffer, 0);
962 } else {
a99b9ea2
MD
963 KKASSERT(buffer->io.lock.refs == 0);
964 ++hammer_count_refedbufs;
af209b0f
MD
965 hammer_ref(&buffer->io.lock);
966 buffer->io.reclaim = 1;
967 hammer_rel_buffer(buffer, 1);
51c35492
MD
968 }
969 return(0);
970}
971