Merge from vendor branch GDB:
[dragonfly.git] / sys / vfs / hammer / hammer_recover.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.2 2008/01/10 07:41:03 dillon Exp $
35  */
36
37 #include "hammer.h"
38
39 static void hammer_recover_buffer_stage1(hammer_cluster_t cluster,
40                                 int32_t buf_no);
41 static void hammer_recover_buffer_stage2(hammer_cluster_t cluster,
42                                 int32_t buf_no);
43 static int  hammer_recover_record(hammer_cluster_t cluster,
44                                 hammer_buffer_t buffer, int32_t rec_offset,
45                                 hammer_record_ondisk_t rec);
46 static int hammer_recover_btree(hammer_cluster_t cluster,
47                                 hammer_buffer_t buffer, int32_t rec_offset,
48                                 hammer_record_ondisk_t rec);
49
50 /*
51  * Recover a cluster.  The caller has referenced and locked the cluster.
52  * 
53  * Generally returns 0 on success and EIO if the recovery was unsuccessful.
54  */
55 int
56 hammer_recover(hammer_cluster_t cluster)
57 {
58         int buf_no;
59         int nbuffers;
60         int32_t r;
61         u_int32_t bitmap;
62
63         return(0); /* XXX temporarily disabled */
64         Debugger("hammer_recover");
65         KKASSERT(cluster->ondisk->synchronized_tid);
66
67         nbuffers = cluster->ondisk->clu_limit / HAMMER_BUFSIZE;
68         hammer_modify_cluster(cluster);
69
70         /*
71          * Re-initialize the A-lists.
72          */
73         hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
74                           HAMMER_ASTATE_FREE);
75         hammer_alist_init(&cluster->alist_btree,
76                           HAMMER_FSBUF_MAXBLKS,
77                           (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
78                           HAMMER_ASTATE_ALLOC);
79         hammer_alist_init(&cluster->alist_mdata,
80                           HAMMER_FSBUF_MAXBLKS,
81                           (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
82                           HAMMER_ASTATE_ALLOC);
83         hammer_alist_init(&cluster->alist_record,
84                           HAMMER_FSBUF_MAXBLKS,
85                           (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
86                           HAMMER_ASTATE_ALLOC);
87
88         /*
89          * Scan the cluster's clu_record_buf_bitmap, reserve record buffers
90          * from the master bitmap before we try to recover their data.  Free
91          * the block of records to alist_record.
92          *
93          * We need to mark the blocks as free in alist_record so future
94          * allocations will dive into the buffer A-list's, but we don't 
95          * want to destroy the underlying buffer A-list's.  Because the
96          * meta data in cluster->alist_record indicates state 00 (all-allocated
97          * but not initialized), it will not dive down into the buffer when
98          * freeing the entire buffer.
99          */
100         for (buf_no = 1; buf_no < nbuffers; ++buf_no) {
101                 bitmap = cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5];
102                 if (bitmap == 0) {
103                         buf_no = ((buf_no + 32) & ~31) - 1;
104                         continue;
105                 }
106                 if ((bitmap & (1 << (buf_no & 31))) == 0)
107                         continue;
108                 r = hammer_alist_alloc_fwd(&cluster->alist_master, 1, buf_no);
109                 KKASSERT(r == buf_no);
110                 hammer_alist_free(&cluster->alist_record,
111                                   buf_no * HAMMER_FSBUF_MAXBLKS,
112                                   HAMMER_FSBUF_MAXBLKS);
113         }
114
115         /*
116          * Scan the cluster's clu_record_buf_bitmap, reassign buffers
117          * from alist_master to alist_record, and reallocate individual
118          * records and any related data reference if they meet the critera.
119          */
120         for (buf_no = 1; buf_no < nbuffers; ++buf_no) {
121                 bitmap = cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5];
122                 if (bitmap == 0) {
123                         buf_no = ((buf_no + 32) & ~31) - 1;
124                         continue;
125                 }
126                 if ((bitmap & (1 << (buf_no & 31))) == 0)
127                         continue;
128                 hammer_recover_buffer_stage1(cluster, buf_no);
129         }
130
131         /*
132          * The cluster is now in good enough shape that general allocations
133          * are possible.  Construct an empty B-Tree root.
134          */
135         {
136                 hammer_node_t croot;
137                 int error;
138
139                 croot = hammer_alloc_btree(cluster, &error);
140                 if (error == 0) {
141                         hammer_modify_node(croot);
142                         bzero(croot->ondisk, sizeof(*croot->ondisk));
143                         croot->ondisk->count = 0;
144                         croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
145                         cluster->ondisk->clu_btree_root = croot->node_offset;
146                 }
147         }
148
149         /*
150          * Scan the cluster's clu_record_buf_bitmap again and regenerate
151          * the B-Tree.
152          *
153          * XXX B-tree record for cluster-push
154          */
155         for (buf_no = 1; buf_no < nbuffers; ++buf_no) {
156                 bitmap = cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5];
157                 if (bitmap == 0) {
158                         buf_no = ((buf_no + 32) & ~31) - 1;
159                         continue;
160                 }
161                 if ((bitmap & (1 << (buf_no & 31))) == 0)
162                         continue;
163                 hammer_recover_buffer_stage2(cluster, buf_no);
164         }
165
166         /*
167          * Validate the parent cluster pointer. XXX
168          */
169         return(0);
170 }
171
172 /*
173  * Reassign buf_no as a record buffer and recover any related data
174  * references.
175  */
176 static void
177 hammer_recover_buffer_stage1(hammer_cluster_t cluster, int32_t buf_no)
178 {
179         hammer_record_ondisk_t rec;
180         hammer_buffer_t buffer;
181         int32_t rec_no;
182         int32_t rec_offset;
183         int error;
184
185         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
186         if (error) {
187                 /*
188                  * If we are unable to access the buffer leave it in a
189                  * reserved state on the master alist.
190                  */
191                 kprintf("hammer_recover_buffer_stage1: error "
192                         "recovering %d:%d:%d\n",
193                         cluster->volume->vol_no, cluster->clu_no, buf_no);
194                 return;
195         }
196
197         /*
198          * Recover the buffer, scan and validate allocated records.  Records
199          * which cannot be recovered are freed.
200          */
201         hammer_modify_buffer(buffer);
202         hammer_alist_recover(&buffer->alist, 0, 0, HAMMER_RECORD_NODES);
203         rec_no = -1;
204         for (;;) {
205                 rec_no = hammer_alist_find(&buffer->alist, rec_no + 1);
206                 if (rec_no == HAMMER_ALIST_BLOCK_NONE)
207                         break;
208                 rec_offset = offsetof(union hammer_fsbuf_ondisk,
209                                       record.recs[rec_no]);
210                 rec_offset += buf_no * HAMMER_BUFSIZE;
211                 rec = &buffer->ondisk->record.recs[rec_no];
212                 error = hammer_recover_record(cluster, buffer, rec_offset, rec);
213                 if (error) {
214                         kprintf("hammer_recover_record: failed %d:%d@%d\n",
215                                 cluster->clu_no, buffer->buf_no, rec_offset);
216                         hammer_alist_free(&buffer->alist, rec_no, 1);
217                 }
218         }
219         hammer_rel_buffer(buffer, 0);
220 }
221
222 /*
223  * Recover a record, at least into a state that doesn't blow up the
224  * filesystem.  Returns 0 on success, non-zero if the record is
225  * unrecoverable.
226  */
227 static int
228 hammer_recover_record(hammer_cluster_t cluster, hammer_buffer_t buffer,
229                              int32_t rec_offset, hammer_record_ondisk_t rec)
230 {
231         hammer_buffer_t dbuf;
232         hammer_tid_t syncid = cluster->ondisk->synchronized_tid;
233         int32_t data_offset;
234         int32_t data_len;
235         int32_t nblks;
236         int32_t dbuf_no;
237         int32_t dblk_no;
238         int32_t r;
239         int error = 0;
240
241         /*
242          * Discard records created after the synchronization point and
243          * undo any deletions made after the synchronization point.
244          */
245         if (rec->base.base.create_tid > syncid)
246                 return(EINVAL);
247
248         if (rec->base.base.delete_tid > syncid)
249                 rec->base.base.delete_tid = 0;
250
251         /*
252          * Validate the record's B-Tree key
253          */
254         if (hammer_btree_cmp(&rec->base.base,
255                              &cluster->ondisk->clu_btree_beg) < 0)  {
256                 return(EINVAL);
257         }
258         if (hammer_btree_cmp(&rec->base.base,
259                              &cluster->ondisk->clu_btree_end) >= 0)  {
260                 return(EINVAL);
261         }
262
263         /*
264          * Validate the record's data.  If the offset is 0 there is no data
265          * (or it is zero-fill) and we can return success immediately.
266          * Otherwise make sure everything is ok.
267          */
268         data_offset = rec->base.data_offset;
269         data_len = rec->base.data_len;
270
271         if (data_len == 0)
272                 rec->base.data_offset = 0;
273         if (data_offset == 0)
274                 return(0);
275         if (data_offset < HAMMER_BUFSIZE ||
276             data_offset >= cluster->ondisk->clu_limit ||
277             data_len < 0 || data_len > HAMMER_MAXDATA ||
278             data_offset + data_len > cluster->ondisk->clu_limit) {
279                 return(EINVAL);
280         }
281
282         /*
283          * Check data_offset relative to rec_offset
284          */
285         if (data_offset < rec_offset && data_offset + data_len > rec_offset)
286                 return(EINVAL);
287         if (data_offset >= rec_offset &&
288             data_offset < rec_offset + sizeof(struct hammer_base_record)) {
289                 return(EINVAL);
290         }
291
292         /*
293          * Check for data embedded in the record
294          */
295         if (data_offset >= rec_offset &&
296             data_offset < rec_offset + HAMMER_RECORD_SIZE) {
297                 if (data_offset + data_len > rec_offset + HAMMER_RECORD_SIZE)
298                         return(EINVAL);
299                 return(0);
300         }
301
302         /*
303          * Recover the allocated data either out of the cluster's master alist
304          * or as a buffer sub-allocation.
305          */
306         if ((data_len & HAMMER_BUFMASK) == 0) {
307                 if (data_offset & HAMMER_BUFMASK)
308                         return(EINVAL);
309                 nblks = data_len / HAMMER_BUFSIZE;
310                 dbuf_no = data_offset / HAMMER_BUFSIZE;
311
312                 r = hammer_alist_alloc_fwd(&cluster->alist_master,
313                                            nblks, dbuf_no);
314                 if (r == HAMMER_ALIST_BLOCK_NONE)
315                         return(EINVAL);
316                 if (r != dbuf_no) {
317                         hammer_alist_free(&cluster->alist_master, r, nblks);
318                         return(EINVAL);
319                 }
320         } else {
321                 if ((data_offset & ~HAMMER_BUFMASK) !=
322                     ((data_offset + data_len) & ~HAMMER_BUFMASK)) {
323                         return(EINVAL);
324                 }
325                 if ((data_offset & HAMMER_BUFMASK) <
326                     sizeof(struct hammer_fsbuf_head)) {
327                         return(EINVAL);
328                 }
329                 if (data_offset & HAMMER_DATA_BLKMASK)
330                         return(EINVAL);
331
332                 /*
333                  * Ok, recover the space in the data buffer.
334                  */
335                 dbuf_no = data_offset / HAMMER_BUFSIZE;
336                 r = hammer_alist_alloc_fwd(&cluster->alist_master, 1, dbuf_no);
337                 if (r != dbuf_no && r != HAMMER_ALIST_BLOCK_NONE)
338                         hammer_alist_free(&cluster->alist_master, r, 1);
339                 if (r == dbuf_no) {
340                         /*
341                          * This is the first time we've tried to recover
342                          * data in this data buffer, reinit it (but don't
343                          * zero it out, obviously).
344                          *
345                          * Calling initbuffer marks the data blocks within
346                          * the buffer as being free.
347                          */
348                         dbuf = hammer_get_buffer(cluster, dbuf_no,
349                                                  0, &error);
350                         if (error == 0) {
351                                 hammer_modify_buffer(dbuf);
352                                 hammer_initbuffer(&dbuf->alist, 
353                                                   &dbuf->ondisk->head,
354                                                   HAMMER_FSBUF_DATA);
355                                 dbuf->buf_type = HAMMER_FSBUF_DATA;
356                         }
357                 } else {
358                         /*
359                          * We've seen this data buffer before.
360                          */
361                         dbuf = hammer_get_buffer(cluster, dbuf_no,
362                                                  0, &error);
363                 }
364                 if (error)
365                         return(EINVAL);
366
367                 if (dbuf->buf_type != HAMMER_FSBUF_DATA) {
368                         hammer_rel_buffer(dbuf, 0);
369                         return(EINVAL);
370                 }
371
372                 /*
373                  * Figure out the data block number and number of blocks.
374                  */
375                 nblks = (data_len + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
376                 nblks /= HAMMER_DATA_BLKSIZE;
377                 dblk_no = ((data_offset & HAMMER_BUFMASK) - offsetof(union hammer_fsbuf_ondisk, data.data)) / HAMMER_DATA_BLKSIZE;
378                 if ((data_offset & HAMMER_BUFMASK) != offsetof(union hammer_fsbuf_ondisk, data.data[dblk_no])) {
379                         kprintf("dblk_no %d does not match data_offset %d/%d\n",
380                                 dblk_no,
381                                 offsetof(union hammer_fsbuf_ondisk, data.data[dblk_no]),
382                                 (data_offset & HAMMER_BUFMASK));
383                         hammer_rel_buffer(dbuf, 0);
384                         Debugger("bad data");
385                         return(EINVAL);
386                 }
387                 dblk_no *= HAMMER_FSBUF_MAXBLKS;
388                 r = hammer_alist_alloc_fwd(&cluster->alist_mdata, nblks, dblk_no);
389                 if (r != dblk_no) {
390                         if (r != HAMMER_ALIST_BLOCK_NONE)
391                                 hammer_alist_free(&cluster->alist_mdata, r, nblks);
392                         hammer_rel_buffer(dbuf, 0);
393                         return(EINVAL);
394                 }
395                 hammer_rel_buffer(dbuf, 0);
396         }
397         return(0);
398 }
399
400 /*
401  * Rebuild the B-Ttree for the records residing in the specified buffer.
402  */
403 static void
404 hammer_recover_buffer_stage2(hammer_cluster_t cluster, int32_t buf_no)
405 {
406         hammer_record_ondisk_t rec;
407         hammer_buffer_t buffer;
408         int32_t rec_no;
409         int32_t rec_offset;
410         int error;
411
412         buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
413         if (error) {
414                 /*
415                  * If we are unable to access the buffer leave it in a
416                  * reserved state on the master alist.
417                  */
418                 kprintf("hammer_recover_buffer_stage2: error "
419                         "recovering %d:%d:%d\n",
420                         cluster->volume->vol_no, cluster->clu_no, buf_no);
421                 return;
422         }
423
424         /*
425          * Recover the buffer, scan and validate allocated records.  Records
426          * which cannot be recovered are freed.
427          */
428         rec_no = -1;
429         for (;;) {
430                 rec_no = hammer_alist_find(&buffer->alist, rec_no + 1);
431                 if (rec_no == HAMMER_ALIST_BLOCK_NONE)
432                         break;
433                 rec_offset = offsetof(union hammer_fsbuf_ondisk,
434                                       record.recs[rec_no]);
435                 rec_offset += buf_no * HAMMER_BUFSIZE;
436                 rec = &buffer->ondisk->record.recs[rec_no];
437                 error = hammer_recover_btree(cluster, buffer, rec_offset, rec);
438                 if (error) {
439                         kprintf("hammer_recover_btree: failed %d:%d@%d\n",
440                                 cluster->clu_no, buffer->buf_no, rec_offset);
441                         /* XXX free the record and its data? */
442                         /*hammer_alist_free(&buffer->alist, rec_no, 1);*/
443                 }
444         }
445         hammer_rel_buffer(buffer, 0);
446 }
447
448 static int
449 hammer_recover_btree(hammer_cluster_t cluster, hammer_buffer_t buffer,
450                       int32_t rec_offset, hammer_record_ondisk_t rec)
451 {
452         struct hammer_cursor cursor;
453         union hammer_btree_elm elm;
454         int error;
455
456         error = hammer_init_cursor_cluster(&cursor, cluster);
457         if (error)
458                 goto failed;
459         cursor.key_beg = rec->base.base;
460         cursor.flags = HAMMER_CURSOR_INSERT;
461         error = hammer_btree_lookup(&cursor);
462         if (error == 0) {
463                 kprintf("hammer_recover_btree: Duplicate record\n");
464                 hammer_print_btree_elm(&cursor.node->ondisk->elms[cursor.index], HAMMER_BTREE_TYPE_LEAF, cursor.index);
465                 Debugger("duplicate record");
466         }
467         if (error != ENOENT)
468                 goto failed;
469
470         elm.leaf.base = rec->base.base;
471         elm.leaf.rec_offset = rec_offset;
472         elm.leaf.data_offset = rec->base.data_offset;
473         elm.leaf.data_len = rec->base.data_len;
474         elm.leaf.data_crc = rec->base.data_crc;
475
476         error = hammer_btree_insert(&cursor, &elm);
477         if (error) {
478                 kprintf("hammer_recover_btree: insertion failed\n");
479         }
480         /* XXX cluster pushes? */
481
482 failed:
483         hammer_done_cursor(&cursor);
484         return(error);
485 }
486