a0364f92cfd78841cdb2bd89facb593c74e5517f
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.26 2008/06/27 20:56:59 dillon Exp $
35  */
36
37 #include "hammer.h"
38
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40                         hammer_off_t end_off);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42                         char *src, char *dst, int bytes);
43 #if 0
44 static void hammer_recover_debug_dump(int w, char *buf, int bytes);
45 #endif
46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47                         hammer_fifo_undo_t undo, int bytes);
48
49 /*
50  * Recover a filesystem on mount
51  *
52  * NOTE: No information from the root volume has been cached in the
53  * hammer_mount structure yet, so we need to access the root volume's
54  * buffer directly.
55  */
56 int
57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
58 {
59         hammer_blockmap_t rootmap;
60         hammer_buffer_t buffer;
61         hammer_off_t scan_offset;
62         hammer_off_t bytes;
63         hammer_fifo_tail_t tail;
64         hammer_fifo_undo_t undo;
65         hammer_off_t first_offset;
66         hammer_off_t last_offset;
67         int error;
68         int reported = 0;
69
70         /*
71          * Examine the UNDO FIFO.  If it is empty the filesystem is clean
72          * and no action need be taken.
73          */
74         rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
75
76         if (rootmap->first_offset == rootmap->next_offset)
77                 return(0);
78
79         first_offset = rootmap->first_offset;
80         last_offset  = rootmap->next_offset;
81
82         if (last_offset >= first_offset) {
83                 bytes = last_offset - first_offset;
84         } else {
85                 bytes = rootmap->alloc_offset - first_offset +
86                         (last_offset & HAMMER_OFF_LONG_MASK);
87         }
88         kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
89                 "(%lld bytes of UNDO)%s\n",
90                 root_volume->ondisk->vol_name,
91                 first_offset, last_offset,
92                 bytes,
93                 (hmp->ronly ? " (RO)" : "(RW)"));
94         if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
95                 kprintf("Undo size is absurd, unable to mount\n");
96                 return(EIO);
97         }
98
99         /*
100          * Scan the UNDOs backwards.
101          */
102         scan_offset = last_offset;
103         buffer = NULL;
104         if (scan_offset > rootmap->alloc_offset) {
105                 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
106                         root_volume->ondisk->vol_name,
107                         scan_offset);
108                 error = EIO;
109                 goto done;
110         }
111
112         while ((int64_t)bytes > 0) {
113                 if (hammer_debug_general & 0x0080)
114                         kprintf("scan_offset %016llx\n", scan_offset);
115                 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
116                         scan_offset = rootmap->alloc_offset;
117                         continue;
118                 }
119                 if (scan_offset - sizeof(*tail) <
120                     HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
121                         kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
122                                 "underflow\n",
123                                 root_volume->ondisk->vol_name,
124                                 scan_offset);
125                         error = EIO;
126                         break;
127                 }
128                 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
129                                     &error, &buffer);
130                 if (error) {
131                         kprintf("HAMMER(%s) Unable to read UNDO TAIL "
132                                 "at %016llx\n",
133                                 root_volume->ondisk->vol_name,
134                                 scan_offset - sizeof(*tail));
135                         break;
136                 }
137
138                 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
139                         kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
140                                 "at %016llx\n",
141                                 root_volume->ondisk->vol_name,
142                                 scan_offset - sizeof(*tail));
143                         error = EIO;
144                         break;
145                 }
146                 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
147
148                 error = hammer_recover_undo(hmp, root_volume, undo,
149                                 HAMMER_BUFSIZE -
150                                 (int)((char *)undo - (char *)buffer->ondisk));
151                 if (error) {
152                         kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
153                                 root_volume->ondisk->vol_name,
154                                 scan_offset - tail->tail_size);
155                         break;
156                 }
157                 scan_offset -= tail->tail_size;
158                 bytes -= tail->tail_size;
159
160                 /*
161                  * If too many dirty buffers have built up 
162                  */
163                 if (hammer_flusher_meta_limit(hmp)) {
164                         if (hmp->ronly == 0) {
165                                 hammer_recover_flush_buffers(hmp, root_volume,
166                                                              0);
167                                 kprintf("HAMMER(%s) Continuing recovery\n",
168                                         root_volume->ondisk->vol_name);
169                         } else if (reported == 0) {
170                                 reported = 1;
171                                 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n",
172                                         root_volume->ondisk->vol_name);
173                         }
174                 }
175         }
176 done:
177         if (buffer)
178                 hammer_rel_buffer(buffer, 0);
179
180         /*
181          * After completely flushing all the recovered buffers the volume
182          * header will also be flushed.  Force the UNDO FIFO to 0-length.
183          */
184         if (root_volume->io.recovered == 0) {
185                 hammer_ref_volume(root_volume);
186                 root_volume->io.recovered = 1;
187         }
188         hammer_modify_volume(NULL, root_volume, NULL, 0);
189         rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
190         rootmap->first_offset = last_offset;
191         rootmap->next_offset = last_offset;
192         hammer_modify_volume_done(root_volume);
193
194         /*
195          * We have collected a large number of dirty buffers during the
196          * recovery, flush them all out.  The root volume header will
197          * be flushed out last.
198          */
199         if (hmp->ronly == 0 && error == 0)
200                 hammer_recover_flush_buffers(hmp, root_volume, 1);
201         kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name);
202         return (error);
203 }
204
205 static int
206 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
207 {
208         int max_bytes;
209
210         max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
211         max_bytes += sizeof(*tail);
212
213         /*
214          * tail overlaps buffer boundary
215          */
216         if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
217                 return(1);
218         }
219
220         /*
221          * signature check, the tail signature is allowed to be the head
222          * signature only for 8-byte PADs.
223          */
224         switch(tail->tail_signature) {
225         case HAMMER_TAIL_SIGNATURE:
226                 break;
227         case HAMMER_HEAD_SIGNATURE:
228                 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
229                     tail->tail_size != sizeof(*tail)) {
230                         return(2);
231                 }
232                 break;
233         }
234
235         /*
236          * The undo structure must not overlap a buffer boundary.
237          */
238         if (tail->tail_size < 0 || tail->tail_size > max_bytes) {
239                 return(3);
240         }
241         return(0);
242 }
243
244 static int
245 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
246                     hammer_fifo_undo_t undo, int bytes)
247 {
248         hammer_fifo_tail_t tail;
249         hammer_volume_t volume;
250         hammer_buffer_t buffer;
251         hammer_off_t buf_offset;
252         int zone;
253         int error;
254         int vol_no;
255         int max_bytes;
256         u_int32_t offset;
257         u_int32_t crc;
258
259         /*
260          * Basic sanity checks
261          */
262         if (bytes < HAMMER_HEAD_ALIGN) {
263                 kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
264                 return(EIO);
265         }
266         if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
267                 kprintf("HAMMER: Bad head signature %04x\n", 
268                         undo->head.hdr_signature);
269                 return(EIO);
270         }
271         if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
272             undo->head.hdr_size > bytes) {
273                 kprintf("HAMMER: Bad size %d\n", bytes);
274                 return(EIO);
275         }
276
277         /*
278          * Skip PAD records.  Note that PAD records also do not require
279          * a tail and may have a truncated structure.
280          */
281         if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
282                 return(0);
283
284         /*
285          * Check the CRC
286          */
287         crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
288               crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
289         if (undo->head.hdr_crc != crc) {
290                 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
291                         undo->head.hdr_crc, crc);
292                 return(EIO);
293         }
294
295
296         /*
297          * Check the tail
298          */
299         bytes = undo->head.hdr_size;
300         tail = (void *)((char *)undo + bytes - sizeof(*tail));
301         if (tail->tail_size != undo->head.hdr_size) {
302                 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
303                 return(EIO);
304         }
305         if (tail->tail_type != undo->head.hdr_type) {
306                 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
307                 return(EIO);
308         }
309
310         /*
311          * Only process UNDO records
312          */
313         if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
314                 return(0);
315
316         /*
317          * Validate the UNDO record.
318          */
319         max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
320         if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
321                 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
322                         undo->undo_data_bytes, max_bytes);
323                 return(EIO);
324         }
325
326         /*
327          * The undo offset may only be a zone-1 or zone-2 offset.
328          *
329          * Currently we only support a zone-1 offset representing the
330          * volume header.
331          */
332         zone = HAMMER_ZONE_DECODE(undo->undo_offset);
333         offset = undo->undo_offset & HAMMER_BUFMASK;
334
335         if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
336                 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
337                 return (EIO);
338         }
339
340         switch(zone) {
341         case HAMMER_ZONE_RAW_VOLUME_INDEX:
342                 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
343                 volume = hammer_get_volume(hmp, vol_no, &error);
344                 if (volume == NULL) {
345                         kprintf("HAMMER: UNDO record, "
346                                 "cannot access volume %d\n", vol_no);
347                         break;
348                 }
349                 hammer_modify_volume(NULL, volume, NULL, 0);
350                 hammer_recover_copy_undo(undo->undo_offset,
351                                          (char *)(undo + 1),
352                                          (char *)volume->ondisk + offset,
353                                          undo->undo_data_bytes);
354                 hammer_modify_volume_done(volume);
355
356                 /*
357                  * Multiple modifications may be made to the same buffer.
358                  * Also, the volume header cannot be written out until
359                  * everything else has been flushed.  This also
360                  * covers the read-only case by preventing the kernel from
361                  * flushing the buffer.
362                  */
363                 if (volume->io.recovered == 0)
364                         volume->io.recovered = 1;
365                 else
366                         hammer_rel_volume(volume, 0);
367                 break;
368         case HAMMER_ZONE_RAW_BUFFER_INDEX:
369                 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
370                 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
371                                            0, &error);
372                 if (buffer == NULL) {
373                         kprintf("HAMMER: UNDO record, "
374                                 "cannot access buffer %016llx\n",
375                                 undo->undo_offset);
376                         break;
377                 }
378                 hammer_modify_buffer(NULL, buffer, NULL, 0);
379                 hammer_recover_copy_undo(undo->undo_offset,
380                                          (char *)(undo + 1),
381                                          (char *)buffer->ondisk + offset,
382                                          undo->undo_data_bytes);
383                 hammer_modify_buffer_done(buffer);
384
385                 /*
386                  * Multiple modifications may be made to the same buffer,
387                  * improve performance by delaying the flush.  This also
388                  * covers the read-only case by preventing the kernel from
389                  * flushing the buffer.
390                  */
391                 if (buffer->io.recovered == 0)
392                         buffer->io.recovered = 1;
393                 else
394                         hammer_rel_buffer(buffer, 0);
395                 break;
396         default:
397                 kprintf("HAMMER: Corrupt UNDO record\n");
398                 error = EIO;
399         }
400         return (error);
401 }
402
403 static void
404 hammer_recover_copy_undo(hammer_off_t undo_offset, 
405                          char *src, char *dst, int bytes)
406 {
407         if (hammer_debug_general & 0x0080)
408                 kprintf("UNDO %016llx: %d\n", undo_offset, bytes);
409 #if 0
410         kprintf("UNDO %016llx:", undo_offset);
411         hammer_recover_debug_dump(22, dst, bytes);
412         kprintf("%22s", "to:");
413         hammer_recover_debug_dump(22, src, bytes);
414 #endif
415         bcopy(src, dst, bytes);
416 }
417
418 #if 0
419
420 static void
421 hammer_recover_debug_dump(int w, char *buf, int bytes)
422 {
423         int i;
424
425         for (i = 0; i < bytes; ++i) {
426                 if (i && (i & 15) == 0)
427                         kprintf("\n%*.*s", w, w, "");
428                 kprintf(" %02x", (unsigned char)buf[i]);
429         }
430         kprintf("\n");
431 }
432
433 #endif
434
435 /*
436  * Flush recovered buffers from recovery operations.  The call to this
437  * routine may be delayed if a read-only mount was made and then later
438  * upgraded to read-write.
439  *
440  * The volume header is always written last.  The UNDO FIFO will be forced
441  * to zero-length by setting next_offset to first_offset.  This leaves the
442  * (now stale) UNDO information used to recover the disk available for
443  * forensic analysis.
444  */
445 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
446 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
447
448 void
449 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
450                              int final)
451 {
452         /*
453          * Flush the buffers out asynchronously, wait for all the I/O to
454          * complete, then do it again to destroy the buffer cache buffer
455          * so it doesn't alias something later on.
456          */
457         RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
458                 hammer_recover_flush_buffer_callback, NULL);
459         hammer_io_wait_all(hmp, "hmrrcw");
460         RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
461                 hammer_recover_flush_buffer_callback, NULL);
462
463         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
464                 hammer_recover_flush_volume_callback, root_volume);
465
466         /*
467          * Finaly, deal with the volume header.
468          */
469         if (root_volume->io.recovered && final) {
470                 crit_enter();
471                 while (hmp->io_running_count)
472                         tsleep(&hmp->io_running_count, 0, "hmrflx", 0);
473                 crit_exit();
474                 root_volume->io.recovered = 0;
475                 hammer_io_flush(&root_volume->io);
476                 hammer_rel_volume(root_volume, 0);
477         }
478 }
479
480 static
481 int
482 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
483 {
484         hammer_volume_t root_volume = data;
485
486         if (volume->io.recovered && volume != root_volume) {
487                 volume->io.recovered = 0;
488                 hammer_io_flush(&volume->io);
489                 hammer_rel_volume(volume, 0);
490         }
491         return(0);
492 }
493
494 static
495 int
496 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
497 {
498         if (buffer->io.recovered) {
499                 buffer->io.recovered = 0;
500                 buffer->io.reclaim = 1;
501                 hammer_io_flush(&buffer->io);
502                 hammer_rel_buffer(buffer, 0);
503         } else {
504                 KKASSERT(buffer->io.lock.refs == 0);
505                 ++hammer_count_refedbufs;
506                 hammer_ref(&buffer->io.lock);
507                 buffer->io.reclaim = 1;
508                 hammer_rel_buffer(buffer, 1);
509         }
510         return(0);
511 }
512