HAMMER 53A/Many: Read and write performance enhancements, etc.
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.20 2008/06/07 07:41:51 dillon Exp $
35  */
36
37 #include "hammer.h"
38
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40                         hammer_off_t end_off);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42                         char *src, char *dst, int bytes);
43 #if 0
44 static void hammer_recover_debug_dump(int w, char *buf, int bytes);
45 #endif
46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47                         hammer_fifo_undo_t undo, int bytes);
48
49 /*
50  * Recover a filesystem on mount
51  *
52  * NOTE: No information from the root volume has been cached in the
53  * hammer_mount structure yet, so we need to access the root volume's
54  * buffer directly.
55  */
56 int
57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
58 {
59         hammer_blockmap_t rootmap;
60         hammer_buffer_t buffer;
61         hammer_off_t scan_offset;
62         hammer_off_t bytes;
63         hammer_fifo_tail_t tail;
64         hammer_fifo_undo_t undo;
65         int error;
66
67         /*
68          * Examine the UNDO FIFO.  If it is empty the filesystem is clean
69          * and no action need be taken.
70          *
71          * NOTE: hmp->blockmap has not been initialized yet so use the
72          * root volume's ondisk buffer directly.
73          */
74         rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
75         hmp->flusher_undo_start = rootmap->next_offset;
76
77         if (rootmap->first_offset == rootmap->next_offset)
78                 return(0);
79
80         if (rootmap->next_offset >= rootmap->first_offset) {
81                 bytes = rootmap->next_offset - rootmap->first_offset;
82         } else {
83                 bytes = rootmap->alloc_offset - rootmap->first_offset +
84                         (rootmap->next_offset & HAMMER_OFF_LONG_MASK);
85         }
86         kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
87                 "(%lld bytes of UNDO)%s\n",
88                 root_volume->ondisk->vol_name,
89                 rootmap->first_offset, rootmap->next_offset,
90                 bytes,
91                 (hmp->ronly ? " (RO)" : "(RW)"));
92         if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
93                 kprintf("Undo size is absurd, unable to mount\n");
94                 return(EIO);
95         }
96
97         /*
98          * Scan the UNDOs backwards.
99          */
100         scan_offset = rootmap->next_offset;
101         buffer = NULL;
102         if (scan_offset > rootmap->alloc_offset) {
103                 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
104                         root_volume->ondisk->vol_name,
105                         scan_offset);
106                 error = EIO;
107                 goto done;
108         }
109
110         while ((int64_t)bytes > 0) {
111                 if (hammer_debug_general & 0x0080)
112                         kprintf("scan_offset %016llx\n", scan_offset);
113                 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
114                         scan_offset = rootmap->alloc_offset;
115                         continue;
116                 }
117                 if (scan_offset - sizeof(*tail) <
118                     HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
119                         kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
120                                 "underflow\n",
121                                 root_volume->ondisk->vol_name,
122                                 scan_offset);
123                         error = EIO;
124                         break;
125                 }
126                 tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
127                                     &error, &buffer);
128                 if (error) {
129                         kprintf("HAMMER(%s) Unable to read UNDO TAIL "
130                                 "at %016llx\n",
131                                 root_volume->ondisk->vol_name,
132                                 scan_offset - sizeof(*tail));
133                         break;
134                 }
135
136                 if (hammer_check_tail_signature(tail, scan_offset) != 0) {
137                         kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
138                                 "at %016llx\n",
139                                 root_volume->ondisk->vol_name,
140                                 scan_offset - sizeof(*tail));
141                         error = EIO;
142                         break;
143                 }
144                 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
145
146                 error = hammer_recover_undo(hmp, root_volume, undo,
147                                 HAMMER_BUFSIZE -
148                                 (int)((char *)undo - (char *)buffer->ondisk));
149                 if (error) {
150                         kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
151                                 root_volume->ondisk->vol_name,
152                                 scan_offset - tail->tail_size);
153                         break;
154                 }
155                 scan_offset -= tail->tail_size;
156                 bytes -= tail->tail_size;
157         }
158 done:
159         /*
160          * Reload flusher_undo_start to kick off the UNDO sequencing.
161          */
162         hmp->flusher_undo_start = rootmap->next_offset;
163         if (buffer)
164                 hammer_rel_buffer(buffer, 0);
165
166         /*
167          * Flush out the root volume header after all other flushes have
168          * completed.
169          */
170         if (hmp->ronly == 0 && error == 0 && root_volume->io.recovered) {
171                 hammer_recover_flush_buffers(hmp, root_volume);
172         }
173         kprintf("HAMMER(%s) End Recovery\n",
174                 root_volume->ondisk->vol_name);
175         return (error);
176 }
177
178 static int
179 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
180 {
181         int max_bytes;
182
183         max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
184         max_bytes += sizeof(*tail);
185
186         /*
187          * tail overlaps buffer boundary
188          */
189         if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
190                 return(1);
191         }
192
193         /*
194          * signature check, the tail signature is allowed to be the head
195          * signature only for 8-byte PADs.
196          */
197         switch(tail->tail_signature) {
198         case HAMMER_TAIL_SIGNATURE:
199                 break;
200         case HAMMER_HEAD_SIGNATURE:
201                 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
202                     tail->tail_size != sizeof(*tail)) {
203                         return(2);
204                 }
205                 break;
206         }
207
208         /*
209          * The undo structure must not overlap a buffer boundary.
210          */
211         if (tail->tail_size < 0 || tail->tail_size > max_bytes) {
212                 return(3);
213         }
214         return(0);
215 }
216
217 static int
218 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
219                     hammer_fifo_undo_t undo, int bytes)
220 {
221         hammer_fifo_tail_t tail;
222         hammer_volume_t volume;
223         hammer_buffer_t buffer;
224         hammer_off_t buf_offset;
225         int zone;
226         int error;
227         int vol_no;
228         int max_bytes;
229         u_int32_t offset;
230         u_int32_t crc;
231
232         /*
233          * Basic sanity checks
234          */
235         if (bytes < HAMMER_HEAD_ALIGN) {
236                 kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
237                 return(EIO);
238         }
239         if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
240                 kprintf("HAMMER: Bad head signature %04x\n", 
241                         undo->head.hdr_signature);
242                 return(EIO);
243         }
244         if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
245             undo->head.hdr_size > bytes) {
246                 kprintf("HAMMER: Bad size %d\n", bytes);
247                 return(EIO);
248         }
249
250         /*
251          * Skip PAD records.  Note that PAD records also do not require
252          * a tail and may have a truncated structure.
253          */
254         if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
255                 return(0);
256
257         /*
258          * Check the CRC
259          */
260         crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
261               crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
262         if (undo->head.hdr_crc != crc) {
263                 kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
264                         undo->head.hdr_crc, crc);
265                 return(EIO);
266         }
267
268
269         /*
270          * Check the tail
271          */
272         bytes = undo->head.hdr_size;
273         tail = (void *)((char *)undo + bytes - sizeof(*tail));
274         if (tail->tail_size != undo->head.hdr_size) {
275                 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
276                 return(EIO);
277         }
278         if (tail->tail_type != undo->head.hdr_type) {
279                 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
280                 return(EIO);
281         }
282
283         /*
284          * Only process UNDO records
285          */
286         if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
287                 return(0);
288
289         /*
290          * Validate the UNDO record.
291          */
292         max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
293         if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
294                 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
295                         undo->undo_data_bytes, max_bytes);
296                 return(EIO);
297         }
298
299         /*
300          * The undo offset may only be a zone-1 or zone-2 offset.
301          *
302          * Currently we only support a zone-1 offset representing the
303          * volume header.
304          */
305         zone = HAMMER_ZONE_DECODE(undo->undo_offset);
306         offset = undo->undo_offset & HAMMER_BUFMASK;
307
308         if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
309                 kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
310                 return (EIO);
311         }
312
313         switch(zone) {
314         case HAMMER_ZONE_RAW_VOLUME_INDEX:
315                 vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
316                 volume = hammer_get_volume(hmp, vol_no, &error);
317                 if (volume == NULL) {
318                         kprintf("HAMMER: UNDO record, "
319                                 "cannot access volume %d\n", vol_no);
320                         break;
321                 }
322                 hammer_modify_volume(NULL, volume, NULL, 0);
323                 hammer_recover_copy_undo(undo->undo_offset,
324                                          (char *)(undo + 1),
325                                          (char *)volume->ondisk + offset,
326                                          undo->undo_data_bytes);
327                 hammer_modify_volume_done(volume);
328
329                 /*
330                  * Multiple modifications may be made to the same buffer,
331                  * improve performance by delaying the flush.  This also
332                  * covers the read-only case by preventing the kernel from
333                  * flushing the buffer.
334                  */
335                 if (volume->io.recovered == 0)
336                         volume->io.recovered = 1;
337                 else
338                         hammer_rel_volume(volume, 0);
339                 break;
340         case HAMMER_ZONE_RAW_BUFFER_INDEX:
341                 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
342                 buffer = hammer_get_buffer(hmp, buf_offset, 0, &error);
343                 if (buffer == NULL) {
344                         kprintf("HAMMER: UNDO record, "
345                                 "cannot access buffer %016llx\n",
346                                 undo->undo_offset);
347                         break;
348                 }
349                 hammer_modify_buffer(NULL, buffer, NULL, 0);
350                 hammer_recover_copy_undo(undo->undo_offset,
351                                          (char *)(undo + 1),
352                                          (char *)buffer->ondisk + offset,
353                                          undo->undo_data_bytes);
354                 hammer_modify_buffer_done(buffer);
355
356                 /*
357                  * Multiple modifications may be made to the same buffer,
358                  * improve performance by delaying the flush.  This also
359                  * covers the read-only case by preventing the kernel from
360                  * flushing the buffer.
361                  */
362                 if (buffer->io.recovered == 0)
363                         buffer->io.recovered = 1;
364                 else
365                         hammer_rel_buffer(buffer, 0);
366                 break;
367         default:
368                 kprintf("HAMMER: Corrupt UNDO record\n");
369                 error = EIO;
370         }
371         return (error);
372 }
373
374 static void
375 hammer_recover_copy_undo(hammer_off_t undo_offset, 
376                          char *src, char *dst, int bytes)
377 {
378         if (hammer_debug_general & 0x0080)
379                 kprintf("UNDO %016llx: %d\n", undo_offset, bytes);
380 #if 0
381         kprintf("UNDO %016llx:", undo_offset);
382         hammer_recover_debug_dump(22, dst, bytes);
383         kprintf("%22s", "to:");
384         hammer_recover_debug_dump(22, src, bytes);
385 #endif
386         bcopy(src, dst, bytes);
387 }
388
389 #if 0
390
391 static void
392 hammer_recover_debug_dump(int w, char *buf, int bytes)
393 {
394         int i;
395
396         for (i = 0; i < bytes; ++i) {
397                 if (i && (i & 15) == 0)
398                         kprintf("\n%*.*s", w, w, "");
399                 kprintf(" %02x", (unsigned char)buf[i]);
400         }
401         kprintf("\n");
402 }
403
404 #endif
405
406 /*
407  * Flush unwritten buffers from undo recovery operations on a read-only mount
408  * when the mount is updated to read-write.
409  */
410 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
411 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
412
413 void
414 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume)
415 {
416         RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
417                 hammer_recover_flush_volume_callback, root_volume);
418         if (root_volume->io.recovered) {
419                 crit_enter();
420                 while (hmp->io_running_count)
421                         tsleep(&hmp->io_running_count, 0, "hmrflx", 0);
422                 crit_exit();
423                 root_volume->io.recovered = 0;
424                 hammer_io_flush(&root_volume->io);
425                 hammer_rel_volume(root_volume, 0);
426         }
427 }
428
429 static
430 int
431 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
432 {
433         hammer_volume_t root_volume = data;
434
435         RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL,
436                 hammer_recover_flush_buffer_callback, NULL);
437         if (volume->io.recovered && volume != root_volume) {
438                 volume->io.recovered = 0;
439                 hammer_io_flush(&volume->io);
440                 hammer_rel_volume(volume, 0);
441         }
442         return(0);
443 }
444
445 static
446 int
447 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
448 {
449         if (buffer->io.recovered) {
450                 buffer->io.recovered = 0;
451                 hammer_io_flush(&buffer->io);
452                 hammer_rel_buffer(buffer, 0);
453         }
454         return(0);
455 }
456