HAMMER 56A/Many: Performance tuning - MEDIA STRUCTURES CHANGED!
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.24 2008/06/11 22:33:21 dillon Exp $
35  */
36
37 #include "hammer.h"
38
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40                         hammer_off_t end_off);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42                         char *src, char *dst, int bytes);
43 #if 0
44 static void hammer_recover_debug_dump(int w, char *buf, int bytes);
45 #endif
46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47                         hammer_fifo_undo_t undo, int bytes);
48
49 /*
50  * Recover a filesystem on mount
51  *
52  * NOTE: No information from the root volume has been cached in the
53  * hammer_mount structure yet, so we need to access the root volume's
54  * buffer directly.
55  */
56 int
57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
58 {
59         hammer_blockmap_t rootmap;
60         hammer_buffer_t buffer;
61         hammer_off_t scan_offset;
62         hammer_off_t bytes;
63         hammer_fifo_tail_t tail;
64         hammer_fifo_undo_t undo;
65         hammer_off_t first_offset;
66         hammer_off_t last_offset;
67         int error;
68
69         /*
70          * Examine the UNDO FIFO.  If it is empty the filesystem is clean
71          * and no action need be taken.
72          */
73         rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
74
75         if (rootmap->first_offset == rootmap->next_offset)
76                 return(0);
77
78         first_offset = rootmap->first_offset;
79         last_offset  = rootmap->next_offset;
80
81         if (last_offset >= first_offset) {
82                 bytes = last_offset - first_offset;
83         } else {
84                 bytes = rootmap->alloc_offset - first_offset +
85                         (last_offset & HAMMER_OFF_LONG_MASK);
86         }
87         kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
88                 "(%lld bytes of UNDO)%s\n",
89                 root_volume->ondisk->vol_name,
90                 first_offset, last_offset,
91                 bytes,
92                 (hmp->ronly ? " (RO)" : "(RW)"));
93         if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
94                 kprintf("Undo size is absurd, unable to mount\n");
95                 return(EIO);
96         }
97
98         /*
99          * Scan the UNDOs backwards.
100          */
101         scan_offset = last_offset;
102         buffer = NULL;
103         if (scan_offset > rootmap->alloc_offset) {
104                 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
105                         root_volume->ondisk->vol_name,
106                         scan_offset);
107                 error = EIO;
108                 goto done;
109         }
110
111         while ((int64_t)bytes > 0) {
112                 if (hammer_debug_general & 0x0080)
113                         kprintf("scan_offset %016llx\n", scan_offset);
114                 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
115                         scan_offset = rootmap->alloc_offset;
116                         continue;
117                 }
118                 if (scan_offset - sizeof(*tail) <
119                     HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
120                         kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
121                                 "underflow\n",
122                                 root_volume->ondisk->vol_name,
123                                 scan_offset);
124                         error = EIO;
125                         break;
126                 }
127                 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
128                                     &error, &buffer);
129                 if (error) {
130                         kprintf("HAMMER(%s) Unable to read UNDO TAIL "
131                                 "at %016llx\n",
132                                 root_volume->ondisk->vol_name,
133                                 scan_offset - sizeof(*tail));
134                         break;
135                 }
136
137                 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
138                         kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
139                                 "at %016llx\n",
140                                 root_volume->ondisk->vol_name,
141                                 scan_offset - sizeof(*tail));
142                         error = EIO;
143                         break;
144                 }
145                 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
146
147                 error = hammer_recover_undo(hmp, root_volume, undo,
148                                 HAMMER_BUFSIZE -
149                                 (int)((char *)undo - (char *)buffer->ondisk));
150                 if (error) {
151                         kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
152                                 root_volume->ondisk->vol_name,
153                                 scan_offset - tail->tail_size);
154                         break;
155                 }
156                 scan_offset -= tail->tail_size;
157                 bytes -= tail->tail_size;
158         }
159 done:
160         if (buffer)
161                 hammer_rel_buffer(buffer, 0);
162
163         /*
164          * After completely flushing all the recovered buffers the volume
165          * header will also be flushed.  Force the UNDO FIFO to 0-length.
166          */
167         if (root_volume->io.recovered == 0) {
168                 hammer_ref_volume(root_volume);
169                 root_volume->io.recovered = 1;
170         }
171         hammer_modify_volume(NULL, root_volume, NULL, 0);
172         rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
173         rootmap->first_offset = last_offset;
174         rootmap->next_offset = last_offset;
175         hammer_modify_volume_done(root_volume);
176
177         /*
178          * We have collected a large number of dirty buffers during the
179          * recovery, flush them all out.  The root volume header will
180          * be flushed out last.
181          */
182         if (hmp->ronly == 0 && error == 0)
183                 hammer_recover_flush_buffers(hmp, root_volume);
184         kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name);
185         return (error);
186 }
187
188 static int
189 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
190 {
191         int max_bytes;
192
193         max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
194         max_bytes += sizeof(*tail);
195
196         /*
197          * tail overlaps buffer boundary
198          */
199         if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
200                 return(1);
201         }
202
203         /*
204          * signature check, the tail signature is allowed to be the head
205          * signature only for 8-byte PADs.
206          */
207         switch(tail->tail_signature) {
208         case HAMMER_TAIL_SIGNATURE:
209                 break;
210         case HAMMER_HEAD_SIGNATURE:
211                 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
212                     tail->tail_size != sizeof(*tail)) {
213                         return(2);
214                 }
215                 break;
216         }
217
218         /*
219          * The undo structure must not overlap a buffer boundary.
220          */
221         if (tail->tail_size < 0 || tail->tail_size > max_bytes) {
222                 return(3);
223         }
224         return(0);
225 }
226
227 static int
228 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
229                     hammer_fifo_undo_t undo, int bytes)
230 {
231         hammer_fifo_tail_t tail;
232         hammer_volume_t volume;
233         hammer_buffer_t buffer;
234         hammer_off_t buf_offset;
235         int zone;
236         int error;
237         int vol_no;
238         int max_bytes;
239         u_int32_t offset;
240         u_int32_t crc;
241
242         /*
243          * Basic sanity checks
244          */
245         if (bytes < HAMMER_HEAD_ALIGN) {
246                 kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
247                 return(EIO);
248         }
249         if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
250                 kprintf("HAMMER: Bad head signature %04x\n", 
251                         undo->head.hdr_signature);
252                 return(EIO);
253         }
254         if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
255             undo->head.hdr_size > bytes) {
256                 kprintf("HAMMER: Bad size %d\n", bytes);
257                 return(EIO);
258         }
259
260         /*
261          * Skip PAD records.  Note that PAD records also do not require
262          * a tail and may have a truncated structure.
263          */
264         if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
265                 return(0);
266
267         /*
268          * Check the CRC
269          */
270         crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
271               crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
272         if (undo->head.hdr_crc != crc) {
273                 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
274                         undo->head.hdr_crc, crc);
275                 return(EIO);
276         }
277
278
279         /*
280          * Check the tail
281          */
282         bytes = undo->head.hdr_size;
283         tail = (void *)((char *)undo + bytes - sizeof(*tail));
284         if (tail->tail_size != undo->head.hdr_size) {
285                 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
286                 return(EIO);
287         }
288         if (tail->tail_type != undo->head.hdr_type) {
289                 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
290                 return(EIO);
291         }
292
293         /*
294          * Only process UNDO records
295          */
296         if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
297                 return(0);
298
299         /*
300          * Validate the UNDO record.
301          */
302         max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
303         if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
304                 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
305                         undo->undo_data_bytes, max_bytes);
306                 return(EIO);
307         }
308
309         /*
310          * The undo offset may only be a zone-1 or zone-2 offset.
311          *
312          * Currently we only support a zone-1 offset representing the
313          * volume header.
314          */
315         zone = HAMMER_ZONE_DECODE(undo->undo_offset);
316         offset = undo->undo_offset & HAMMER_BUFMASK;
317
318         if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
319                 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
320                 return (EIO);
321         }
322
323         switch(zone) {
324         case HAMMER_ZONE_RAW_VOLUME_INDEX:
325                 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
326                 volume = hammer_get_volume(hmp, vol_no, &error);
327                 if (volume == NULL) {
328                         kprintf("HAMMER: UNDO record, "
329                                 "cannot access volume %d\n", vol_no);
330                         break;
331                 }
332                 hammer_modify_volume(NULL, volume, NULL, 0);
333                 hammer_recover_copy_undo(undo->undo_offset,
334                                          (char *)(undo + 1),
335                                          (char *)volume->ondisk + offset,
336                                          undo->undo_data_bytes);
337                 hammer_modify_volume_done(volume);
338
339                 /*
340                  * Multiple modifications may be made to the same buffer.
341                  * Also, the volume header cannot be written out until
342                  * everything else has been flushed.  This also
343                  * covers the read-only case by preventing the kernel from
344                  * flushing the buffer.
345                  */
346                 if (volume->io.recovered == 0)
347                         volume->io.recovered = 1;
348                 else
349                         hammer_rel_volume(volume, 0);
350                 break;
351         case HAMMER_ZONE_RAW_BUFFER_INDEX:
352                 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
353                 buffer = hammer_get_buffer(hmp, buf_offset, 0, &error);
354                 if (buffer == NULL) {
355                         kprintf("HAMMER: UNDO record, "
356                                 "cannot access buffer %016llx\n",
357                                 undo->undo_offset);
358                         break;
359                 }
360                 hammer_modify_buffer(NULL, buffer, NULL, 0);
361                 hammer_recover_copy_undo(undo->undo_offset,
362                                          (char *)(undo + 1),
363                                          (char *)buffer->ondisk + offset,
364                                          undo->undo_data_bytes);
365                 hammer_modify_buffer_done(buffer);
366
367                 /*
368                  * Multiple modifications may be made to the same buffer,
369                  * improve performance by delaying the flush.  This also
370                  * covers the read-only case by preventing the kernel from
371                  * flushing the buffer.
372                  */
373                 if (buffer->io.recovered == 0)
374                         buffer->io.recovered = 1;
375                 else
376                         hammer_rel_buffer(buffer, 0);
377                 break;
378         default:
379                 kprintf("HAMMER: Corrupt UNDO record\n");
380                 error = EIO;
381         }
382         return (error);
383 }
384
385 static void
386 hammer_recover_copy_undo(hammer_off_t undo_offset, 
387                          char *src, char *dst, int bytes)
388 {
389         if (hammer_debug_general & 0x0080)
390                 kprintf("UNDO %016llx: %d\n", undo_offset, bytes);
391 #if 0
392         kprintf("UNDO %016llx:", undo_offset);
393         hammer_recover_debug_dump(22, dst, bytes);
394         kprintf("%22s", "to:");
395         hammer_recover_debug_dump(22, src, bytes);
396 #endif
397         bcopy(src, dst, bytes);
398 }
399
400 #if 0
401
402 static void
403 hammer_recover_debug_dump(int w, char *buf, int bytes)
404 {
405         int i;
406
407         for (i = 0; i < bytes; ++i) {
408                 if (i && (i & 15) == 0)
409                         kprintf("\n%*.*s", w, w, "");
410                 kprintf(" %02x", (unsigned char)buf[i]);
411         }
412         kprintf("\n");
413 }
414
415 #endif
416
417 /*
418  * Flush recovered buffers from recovery operations.  The call to this
419  * routine may be delayed if a read-only mount was made and then later
420  * upgraded to read-write.
421  *
422  * The volume header is always written last.  The UNDO FIFO will be forced
423  * to zero-length by setting next_offset to first_offset.  This leaves the
424  * (now stale) UNDO information used to recover the disk available for
425  * forensic analysis.
426  */
427 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
428 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
429
430 void
431 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume)
432 {
433         /*
434          * Flush the buffers out asynchronously, wait for all the I/O to
435          * complete, then do it again to destroy the buffer cache buffer
436          * so it doesn't alias something later on.
437          */
438         RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
439                 hammer_recover_flush_buffer_callback, NULL);
440         hammer_io_wait_all(hmp, "hmrrcw");
441         RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
442                 hammer_recover_flush_buffer_callback, NULL);
443
444         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
445                 hammer_recover_flush_volume_callback, root_volume);
446
447         /*
448          * Finaly, deal with the volume header.
449          */
450         if (root_volume->io.recovered) {
451                 crit_enter();
452                 while (hmp->io_running_count)
453                         tsleep(&hmp->io_running_count, 0, "hmrflx", 0);
454                 crit_exit();
455                 root_volume->io.recovered = 0;
456                 hammer_io_flush(&root_volume->io);
457                 hammer_rel_volume(root_volume, 0);
458         }
459 }
460
461 static
462 int
463 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
464 {
465         hammer_volume_t root_volume = data;
466
467         if (volume->io.recovered && volume != root_volume) {
468                 volume->io.recovered = 0;
469                 hammer_io_flush(&volume->io);
470                 hammer_rel_volume(volume, 0);
471         }
472         return(0);
473 }
474
475 static
476 int
477 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
478 {
479         if (buffer->io.recovered) {
480                 buffer->io.recovered = 0;
481                 buffer->io.reclaim = 1;
482                 hammer_io_flush(&buffer->io);
483                 hammer_rel_buffer(buffer, 0);
484         } else {
485                 KKASSERT(buffer->io.lock.refs == 0);
486                 ++hammer_count_refedbufs;
487                 hammer_ref(&buffer->io.lock);
488                 buffer->io.reclaim = 1;
489                 hammer_rel_buffer(buffer, 1);
490         }
491         return(0);
492 }
493