Merge from vendor branch NETGRAPH:
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
CommitLineData
4d75d829
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
4a2796f3 34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.25 2008/06/20 05:38:26 dillon Exp $
4d75d829
MD
35 */
36
37#include "hammer.h"
38
f90dde4c
MD
39static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40 hammer_off_t end_off);
41static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42 char *src, char *dst, int bytes);
d36ec43b 43#if 0
f90dde4c 44static void hammer_recover_debug_dump(int w, char *buf, int bytes);
d36ec43b 45#endif
51c35492
MD
46static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47 hammer_fifo_undo_t undo, int bytes);
4d75d829
MD
48
49/*
f90dde4c 50 * Recover a filesystem on mount
0729c8c8
MD
51 *
52 * NOTE: No information from the root volume has been cached in the
53 * hammer_mount structure yet, so we need to access the root volume's
54 * buffer directly.
4d75d829
MD
55 */
56int
f90dde4c 57hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
4d75d829 58{
f90dde4c
MD
59 hammer_blockmap_t rootmap;
60 hammer_buffer_t buffer;
61 hammer_off_t scan_offset;
62 hammer_off_t bytes;
63 hammer_fifo_tail_t tail;
64 hammer_fifo_undo_t undo;
9f5097dc
MD
65 hammer_off_t first_offset;
66 hammer_off_t last_offset;
f90dde4c 67 int error;
b33e2cc0 68
4d75d829 69 /*
f90dde4c
MD
70 * Examine the UNDO FIFO. If it is empty the filesystem is clean
71 * and no action need be taken.
4d75d829 72 */
f90dde4c 73 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
c9b9e29d 74
f90dde4c
MD
75 if (rootmap->first_offset == rootmap->next_offset)
76 return(0);
4d75d829 77
9f5097dc
MD
78 first_offset = rootmap->first_offset;
79 last_offset = rootmap->next_offset;
80
81 if (last_offset >= first_offset) {
82 bytes = last_offset - first_offset;
c9b9e29d 83 } else {
9f5097dc
MD
84 bytes = rootmap->alloc_offset - first_offset +
85 (last_offset & HAMMER_OFF_LONG_MASK);
c9b9e29d 86 }
09ac686b 87 kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
51c35492 88 "(%lld bytes of UNDO)%s\n",
09ac686b 89 root_volume->ondisk->vol_name,
9f5097dc 90 first_offset, last_offset,
51c35492
MD
91 bytes,
92 (hmp->ronly ? " (RO)" : "(RW)"));
c9b9e29d
MD
93 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
94 kprintf("Undo size is absurd, unable to mount\n");
95 return(EIO);
96 }
4d75d829
MD
97
98 /*
f90dde4c 99 * Scan the UNDOs backwards.
4d75d829 100 */
9f5097dc 101 scan_offset = last_offset;
f90dde4c
MD
102 buffer = NULL;
103 if (scan_offset > rootmap->alloc_offset) {
104 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
105 root_volume->ondisk->vol_name,
106 scan_offset);
107 error = EIO;
c9b9e29d 108 goto done;
f90dde4c 109 }
4d75d829 110
f90dde4c 111 while ((int64_t)bytes > 0) {
3bf2d80a
MD
112 if (hammer_debug_general & 0x0080)
113 kprintf("scan_offset %016llx\n", scan_offset);
ec4e8497
MD
114 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
115 scan_offset = rootmap->alloc_offset;
116 continue;
117 }
f90dde4c
MD
118 if (scan_offset - sizeof(*tail) <
119 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
120 kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
121 "underflow\n",
122 root_volume->ondisk->vol_name,
123 scan_offset);
124 error = EIO;
125 break;
126 }
f90dde4c
MD
127 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
128 &error, &buffer);
129 if (error) {
130 kprintf("HAMMER(%s) Unable to read UNDO TAIL "
131 "at %016llx\n",
132 root_volume->ondisk->vol_name,
133 scan_offset - sizeof(*tail));
134 break;
4d75d829 135 }
4d75d829 136
f90dde4c
MD
137 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
138 kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
139 "at %016llx\n",
140 root_volume->ondisk->vol_name,
141 scan_offset - sizeof(*tail));
142 error = EIO;
143 break;
144 }
145 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
eaeff70d 146
51c35492 147 error = hammer_recover_undo(hmp, root_volume, undo,
f90dde4c
MD
148 HAMMER_BUFSIZE -
149 (int)((char *)undo - (char *)buffer->ondisk));
150 if (error) {
151 kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
152 root_volume->ondisk->vol_name,
153 scan_offset - tail->tail_size);
b33e2cc0 154 break;
f90dde4c
MD
155 }
156 scan_offset -= tail->tail_size;
157 bytes -= tail->tail_size;
4d75d829 158 }
c9b9e29d 159done:
f90dde4c
MD
160 if (buffer)
161 hammer_rel_buffer(buffer, 0);
51c35492
MD
162
163 /*
9f5097dc
MD
164 * After completely flushing all the recovered buffers the volume
165 * header will also be flushed. Force the UNDO FIFO to 0-length.
51c35492 166 */
9f5097dc
MD
167 if (root_volume->io.recovered == 0) {
168 hammer_ref_volume(root_volume);
169 root_volume->io.recovered = 1;
51c35492 170 }
9f5097dc
MD
171 hammer_modify_volume(NULL, root_volume, NULL, 0);
172 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
173 rootmap->first_offset = last_offset;
174 rootmap->next_offset = last_offset;
175 hammer_modify_volume_done(root_volume);
176
177 /*
178 * We have collected a large number of dirty buffers during the
179 * recovery, flush them all out. The root volume header will
180 * be flushed out last.
181 */
182 if (hmp->ronly == 0 && error == 0)
183 hammer_recover_flush_buffers(hmp, root_volume);
184 kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name);
f90dde4c 185 return (error);
4d75d829
MD
186}
187
f90dde4c
MD
188static int
189hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
4d75d829 190{
f90dde4c 191 int max_bytes;
4d75d829 192
f90dde4c
MD
193 max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
194 max_bytes += sizeof(*tail);
b33e2cc0
MD
195
196 /*
f90dde4c 197 * tail overlaps buffer boundary
b33e2cc0 198 */
f90dde4c
MD
199 if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
200 return(1);
b33e2cc0
MD
201 }
202
b33e2cc0 203 /*
f90dde4c
MD
204 * signature check, the tail signature is allowed to be the head
205 * signature only for 8-byte PADs.
b33e2cc0 206 */
f90dde4c
MD
207 switch(tail->tail_signature) {
208 case HAMMER_TAIL_SIGNATURE:
209 break;
210 case HAMMER_HEAD_SIGNATURE:
211 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
212 tail->tail_size != sizeof(*tail)) {
213 return(2);
214 }
215 break;
216 }
4d75d829
MD
217
218 /*
f90dde4c 219 * The undo structure must not overlap a buffer boundary.
4d75d829 220 */
f90dde4c
MD
221 if (tail->tail_size < 0 || tail->tail_size > max_bytes) {
222 return(3);
4d75d829 223 }
f90dde4c 224 return(0);
4d75d829
MD
225}
226
4d75d829 227static int
51c35492
MD
228hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
229 hammer_fifo_undo_t undo, int bytes)
4d75d829 230{
f90dde4c
MD
231 hammer_fifo_tail_t tail;
232 hammer_volume_t volume;
233 hammer_buffer_t buffer;
2f85fa4d 234 hammer_off_t buf_offset;
f90dde4c
MD
235 int zone;
236 int error;
237 int vol_no;
238 int max_bytes;
239 u_int32_t offset;
09ac686b 240 u_int32_t crc;
4d75d829
MD
241
242 /*
f90dde4c 243 * Basic sanity checks
4d75d829 244 */
f90dde4c
MD
245 if (bytes < HAMMER_HEAD_ALIGN) {
246 kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
247 return(EIO);
9944ae54 248 }
f90dde4c
MD
249 if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
250 kprintf("HAMMER: Bad head signature %04x\n",
251 undo->head.hdr_signature);
252 return(EIO);
253 }
254 if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
255 undo->head.hdr_size > bytes) {
256 kprintf("HAMMER: Bad size %d\n", bytes);
257 return(EIO);
4d75d829
MD
258 }
259
260 /*
f90dde4c 261 * Skip PAD records. Note that PAD records also do not require
09ac686b 262 * a tail and may have a truncated structure.
4d75d829 263 */
f90dde4c
MD
264 if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
265 return(0);
9944ae54 266
09ac686b
MD
267 /*
268 * Check the CRC
269 */
270 crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
271 crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
272 if (undo->head.hdr_crc != crc) {
273 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
274 undo->head.hdr_crc, crc);
275 return(EIO);
276 }
277
278
9944ae54 279 /*
f90dde4c 280 * Check the tail
9944ae54 281 */
f90dde4c
MD
282 bytes = undo->head.hdr_size;
283 tail = (void *)((char *)undo + bytes - sizeof(*tail));
284 if (tail->tail_size != undo->head.hdr_size) {
285 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
286 return(EIO);
287 }
288 if (tail->tail_type != undo->head.hdr_type) {
289 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
290 return(EIO);
4d75d829
MD
291 }
292
293 /*
f90dde4c 294 * Only process UNDO records
4d75d829 295 */
f90dde4c
MD
296 if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
297 return(0);
4d75d829
MD
298
299 /*
f90dde4c 300 * Validate the UNDO record.
4d75d829 301 */
f90dde4c
MD
302 max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
303 if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
304 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
305 undo->undo_data_bytes, max_bytes);
306 return(EIO);
4d75d829
MD
307 }
308
309 /*
f90dde4c
MD
310 * The undo offset may only be a zone-1 or zone-2 offset.
311 *
312 * Currently we only support a zone-1 offset representing the
313 * volume header.
4d75d829 314 */
f90dde4c
MD
315 zone = HAMMER_ZONE_DECODE(undo->undo_offset);
316 offset = undo->undo_offset & HAMMER_BUFMASK;
4d75d829 317
f90dde4c
MD
318 if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
319 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
320 return (EIO);
321 }
4d75d829 322
f90dde4c
MD
323 switch(zone) {
324 case HAMMER_ZONE_RAW_VOLUME_INDEX:
325 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
326 volume = hammer_get_volume(hmp, vol_no, &error);
327 if (volume == NULL) {
328 kprintf("HAMMER: UNDO record, "
329 "cannot access volume %d\n", vol_no);
330 break;
4d75d829 331 }
f90dde4c
MD
332 hammer_modify_volume(NULL, volume, NULL, 0);
333 hammer_recover_copy_undo(undo->undo_offset,
334 (char *)(undo + 1),
335 (char *)volume->ondisk + offset,
336 undo->undo_data_bytes);
337 hammer_modify_volume_done(volume);
51c35492
MD
338
339 /*
9f5097dc
MD
340 * Multiple modifications may be made to the same buffer.
341 * Also, the volume header cannot be written out until
342 * everything else has been flushed. This also
51c35492
MD
343 * covers the read-only case by preventing the kernel from
344 * flushing the buffer.
345 */
346 if (volume->io.recovered == 0)
347 volume->io.recovered = 1;
348 else
349 hammer_rel_volume(volume, 0);
f90dde4c
MD
350 break;
351 case HAMMER_ZONE_RAW_BUFFER_INDEX:
2f85fa4d 352 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
4a2796f3
MD
353 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
354 0, &error);
f90dde4c
MD
355 if (buffer == NULL) {
356 kprintf("HAMMER: UNDO record, "
357 "cannot access buffer %016llx\n",
358 undo->undo_offset);
359 break;
4d75d829 360 }
f90dde4c
MD
361 hammer_modify_buffer(NULL, buffer, NULL, 0);
362 hammer_recover_copy_undo(undo->undo_offset,
363 (char *)(undo + 1),
364 (char *)buffer->ondisk + offset,
365 undo->undo_data_bytes);
366 hammer_modify_buffer_done(buffer);
51c35492
MD
367
368 /*
369 * Multiple modifications may be made to the same buffer,
370 * improve performance by delaying the flush. This also
371 * covers the read-only case by preventing the kernel from
372 * flushing the buffer.
373 */
374 if (buffer->io.recovered == 0)
375 buffer->io.recovered = 1;
376 else
377 hammer_rel_buffer(buffer, 0);
f90dde4c
MD
378 break;
379 default:
380 kprintf("HAMMER: Corrupt UNDO record\n");
381 error = EIO;
4d75d829 382 }
f90dde4c 383 return (error);
4d75d829
MD
384}
385
f90dde4c
MD
386static void
387hammer_recover_copy_undo(hammer_off_t undo_offset,
388 char *src, char *dst, int bytes)
4d75d829 389{
3bf2d80a 390 if (hammer_debug_general & 0x0080)
47637bff 391 kprintf("UNDO %016llx: %d\n", undo_offset, bytes);
ec4e8497 392#if 0
f90dde4c
MD
393 kprintf("UNDO %016llx:", undo_offset);
394 hammer_recover_debug_dump(22, dst, bytes);
395 kprintf("%22s", "to:");
396 hammer_recover_debug_dump(22, src, bytes);
ec4e8497 397#endif
f90dde4c 398 bcopy(src, dst, bytes);
4d75d829
MD
399}
400
d36ec43b
MD
401#if 0
402
f90dde4c
MD
403static void
404hammer_recover_debug_dump(int w, char *buf, int bytes)
4d75d829 405{
f90dde4c 406 int i;
4d75d829 407
f90dde4c
MD
408 for (i = 0; i < bytes; ++i) {
409 if (i && (i & 15) == 0)
410 kprintf("\n%*.*s", w, w, "");
411 kprintf(" %02x", (unsigned char)buf[i]);
b33e2cc0 412 }
f90dde4c 413 kprintf("\n");
4d75d829
MD
414}
415
d36ec43b 416#endif
51c35492
MD
417
418/*
9f5097dc
MD
419 * Flush recovered buffers from recovery operations. The call to this
420 * routine may be delayed if a read-only mount was made and then later
421 * upgraded to read-write.
422 *
423 * The volume header is always written last. The UNDO FIFO will be forced
424 * to zero-length by setting next_offset to first_offset. This leaves the
425 * (now stale) UNDO information used to recover the disk available for
426 * forensic analysis.
51c35492
MD
427 */
428static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
429static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
430
431void
432hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume)
433{
af209b0f
MD
434 /*
435 * Flush the buffers out asynchronously, wait for all the I/O to
436 * complete, then do it again to destroy the buffer cache buffer
437 * so it doesn't alias something later on.
438 */
439 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
440 hammer_recover_flush_buffer_callback, NULL);
441 hammer_io_wait_all(hmp, "hmrrcw");
0832c9bb
MD
442 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
443 hammer_recover_flush_buffer_callback, NULL);
9f5097dc 444
51c35492
MD
445 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
446 hammer_recover_flush_volume_callback, root_volume);
9f5097dc 447
af209b0f
MD
448 /*
449 * Finaly, deal with the volume header.
450 */
51c35492
MD
451 if (root_volume->io.recovered) {
452 crit_enter();
453 while (hmp->io_running_count)
454 tsleep(&hmp->io_running_count, 0, "hmrflx", 0);
455 crit_exit();
456 root_volume->io.recovered = 0;
457 hammer_io_flush(&root_volume->io);
458 hammer_rel_volume(root_volume, 0);
459 }
460}
461
462static
463int
464hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
465{
466 hammer_volume_t root_volume = data;
467
51c35492
MD
468 if (volume->io.recovered && volume != root_volume) {
469 volume->io.recovered = 0;
470 hammer_io_flush(&volume->io);
471 hammer_rel_volume(volume, 0);
472 }
473 return(0);
474}
475
476static
477int
478hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
479{
480 if (buffer->io.recovered) {
481 buffer->io.recovered = 0;
af209b0f 482 buffer->io.reclaim = 1;
51c35492 483 hammer_io_flush(&buffer->io);
af209b0f
MD
484 hammer_rel_buffer(buffer, 0);
485 } else {
a99b9ea2
MD
486 KKASSERT(buffer->io.lock.refs == 0);
487 ++hammer_count_refedbufs;
af209b0f
MD
488 hammer_ref(&buffer->io.lock);
489 buffer->io.reclaim = 1;
490 hammer_rel_buffer(buffer, 1);
51c35492
MD
491 }
492 return(0);
493}
494