ipfw: Reduce dependency on ifnet threads.
[dragonfly.git] / sbin / hammer / cmd_recover.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer.h"
36
37 struct recover_dict {
38         struct recover_dict *next;
39         struct recover_dict *parent;
40         int64_t obj_id;
41         uint8_t obj_type;
42         uint8_t flags;
43         uint16_t pfs_id;
44         int64_t size;
45         char    *name;
46 };
47
48 #define DICTF_MADEDIR   0x01
49 #define DICTF_MADEFILE  0x02
50 #define DICTF_PARENT    0x04    /* parent attached for real */
51 #define DICTF_TRAVERSED 0x80
52
53 typedef struct bigblock *bigblock_t;
54
55 static void recover_top(char *ptr, hammer_off_t offset);
56 static void recover_elm(hammer_btree_leaf_elm_t leaf);
57 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
58 static char *recover_path(struct recover_dict *dict);
59 static void sanitize_string(char *str);
60 static hammer_off_t scan_raw_limit(void);
61 static void scan_bigblocks(int target_zone);
62 static void free_bigblocks(void);
63 static void add_bigblock_entry(hammer_off_t offset,
64         hammer_blockmap_layer1_t layer1, hammer_blockmap_layer2_t layer2);
65 static bigblock_t get_bigblock_entry(hammer_off_t offset);
66
67 static const char *TargetDir;
68 static int CachedFd = -1;
69 static char *CachedPath;
70
71 typedef struct bigblock {
72         RB_ENTRY(bigblock) entry;
73         hammer_off_t phys_offset; /* zone-2 */
74         struct hammer_blockmap_layer1 layer1;
75         struct hammer_blockmap_layer2 layer2;
76 } *bigblock_t;
77
78 static int
79 bigblock_cmp(bigblock_t b1, bigblock_t b2)
80 {
81         if (b1->phys_offset < b2->phys_offset)
82                 return(-1);
83         if (b1->phys_offset > b2->phys_offset)
84                 return(1);
85         return(0);
86 }
87
88 RB_HEAD(bigblock_rb_tree, bigblock) ZoneTree = RB_INITIALIZER(&ZoneTree);
89 RB_PROTOTYPE2(bigblock_rb_tree, bigblock, entry, bigblock_cmp, hammer_off_t);
90 RB_GENERATE2(bigblock_rb_tree, bigblock, entry, bigblock_cmp, hammer_off_t,
91         phys_offset);
92
93 /*
94  * There was a hidden bug here while iterating zone-2 offset as
95  * shown in an example below.
96  *
97  * If a volume was once used as HAMMER filesystem which consists of
98  * multiple volumes whose usage has reached beyond the first volume,
99  * and then later re-formatted only using 1 volume, hammer recover is
100  * likely to hit assertion in get_buffer() due to having access to
101  * invalid volume (vol1,2,...) from old filesystem data.
102  *
103  * To avoid this, now the command only scans upto the last big-block
104  * that's actually used for filesystem data or meta-data at the moment,
105  * if all layer1/2 entries have correct CRC values. This also avoids
106  * recovery of irrelevant files from old filesystem.
107  *
108  * It also doesn't scan beyond append offset of big-blocks in B-Tree
109  * zone to avoid recovery of irrelevant files from old filesystem,
110  * if layer1/2 entries for those big-blocks have correct CRC values.
111  *
112  * |-----vol0-----|-----vol1-----|-----vol2-----| old filesystem
113  * <-----------------------> used by old filesystem
114  *
115  * |-----vol0-----| new filesystem
116  * <-----> used by new filesystem
117  *        <-------> unused, invalid data from old filesystem
118  *              <-> B-Tree nodes likely to point to vol1
119  */
120
121 void
122 hammer_cmd_recover(char **av, int ac)
123 {
124         struct buffer_info *data_buffer;
125         struct volume_info *volume;
126         bigblock_t b = NULL;
127         hammer_off_t off;
128         hammer_off_t off_end;
129         hammer_off_t off_blk;
130         hammer_off_t raw_limit = 0;
131         hammer_off_t zone_limit = 0;
132         char *ptr;
133         int i;
134         int target_zone = HAMMER_ZONE_BTREE_INDEX;
135         int full = 0;
136         int quick = 0;
137
138         if (ac < 1)
139                 errx(1, "hammer recover <target_dir> [full|quick]");
140
141         TargetDir = av[0];
142         if (ac > 1) {
143                 if (!strcmp(av[1], "full"))
144                         full = 1;
145                 if (!strcmp(av[1], "quick"))
146                         quick = 1;
147         }
148         assert(!full || !quick);
149
150         if (mkdir(TargetDir, 0777) == -1) {
151                 if (errno != EEXIST)
152                         err(1, "mkdir");
153         }
154
155         printf("Running %sraw scan of HAMMER image, recovering to %s\n",
156                 full ? "full " : quick ? "quick " : "",
157                 TargetDir);
158
159         if (!full) {
160                 scan_bigblocks(target_zone);
161                 raw_limit = scan_raw_limit();
162                 if (raw_limit) {
163                         raw_limit += HAMMER_BIGBLOCK_SIZE;
164                         assert(hammer_is_zone_raw_buffer(raw_limit));
165                 }
166         }
167
168         if (quick) {
169                 assert(!full);
170                 if (!RB_EMPTY(&ZoneTree)) {
171                         printf("Found zone-%d big-blocks at\n", target_zone);
172                         RB_FOREACH(b, bigblock_rb_tree, &ZoneTree)
173                                 printf("%016jx\n", b->phys_offset);
174
175                         b = RB_MAX(bigblock_rb_tree, &ZoneTree);
176                         zone_limit = b->phys_offset + HAMMER_BIGBLOCK_SIZE;
177                         assert(hammer_is_zone_raw_buffer(zone_limit));
178                 }
179         }
180
181         if (raw_limit || zone_limit) {
182 #define _fmt "Scanning zone-%d big-blocks till %016jx"
183                 if (!raw_limit) /* unlikely */
184                         printf(_fmt" ???", target_zone, zone_limit);
185                 else if (!zone_limit)
186                         printf(_fmt, HAMMER_ZONE_RAW_BUFFER_INDEX, raw_limit);
187                 else if (raw_limit >= zone_limit)
188                         printf(_fmt, target_zone, zone_limit);
189                 else /* unlikely */
190                         printf(_fmt" ???", HAMMER_ZONE_RAW_BUFFER_INDEX, raw_limit);
191                 printf("\n");
192         }
193
194         data_buffer = NULL;
195         for (i = 0; i < HAMMER_MAX_VOLUMES; i++) {
196                 volume = get_volume(i);
197                 if (volume == NULL)
198                         continue;
199
200                 printf("Scanning volume %d size %s\n",
201                         volume->vol_no, sizetostr(volume->size));
202                 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
203                 off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk);
204
205                 while (off < off_end) {
206                         off_blk = off & HAMMER_BIGBLOCK_MASK64;
207                         if (off_blk == 0)
208                                 b = get_bigblock_entry(off);
209
210                         if (raw_limit) {
211                                 if (off >= raw_limit) {
212                                         printf("Done %016jx\n", (uintmax_t)off);
213                                         goto end;
214                                 }
215                         }
216                         if (zone_limit) {
217                                 if (off >= zone_limit) {
218                                         printf("Done %016jx\n", (uintmax_t)off);
219                                         goto end;
220                                 }
221                                 if (b == NULL) {
222                                         off = HAMMER_ZONE_LAYER2_NEXT_OFFSET(off);
223                                         continue;
224                                 }
225                         }
226
227                         if (b) {
228                                 if (hammer_crc_test_layer1(HammerVersion,
229                                                            &b->layer1) &&
230                                     hammer_crc_test_layer2(HammerVersion,
231                                                            &b->layer2) &&
232                                     off_blk >= b->layer2.append_off) {
233                                         off = HAMMER_ZONE_LAYER2_NEXT_OFFSET(off);
234                                         continue;
235                                 }
236                         }
237
238                         ptr = get_buffer_data(off, &data_buffer, 0);
239                         if (ptr)
240                                 recover_top(ptr, off);
241                         off += HAMMER_BUFSIZE;
242                 }
243         }
244 end:
245         rel_buffer(data_buffer);
246         free_bigblocks();
247
248         if (CachedPath) {
249                 free(CachedPath);
250                 close(CachedFd);
251                 CachedPath = NULL;
252                 CachedFd = -1;
253         }
254 }
255
256 static __inline
257 void
258 print_node(hammer_node_ondisk_t node, hammer_off_t offset)
259 {
260         char buf[HAMMER_BTREE_LEAF_ELMS + 1];
261         int maxcount = hammer_node_max_elements(node->type);
262         int i;
263
264         for (i = 0; i < node->count && i < maxcount; ++i)
265                 buf[i] = hammer_elm_btype(&node->elms[i]);
266         buf[i] = '\0';
267
268         printf("%016jx %c %d %s\n", offset, node->type, node->count, buf);
269 }
270
271 /*
272  * Top level recovery processor.  Assume the data is a B-Tree node.
273  * If the CRC is good we attempt to process the node, building the
274  * object space and creating the dictionary as we go.
275  */
276 static void
277 recover_top(char *ptr, hammer_off_t offset)
278 {
279         hammer_node_ondisk_t node;
280         hammer_btree_elm_t elm;
281         int maxcount;
282         int i;
283         int isnode;
284
285         for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
286                 isnode = hammer_crc_test_btree(HammerVersion, node);
287                 maxcount = hammer_node_max_elements(node->type);
288
289                 if (DebugOpt) {
290                         if (isnode)
291                                 print_node(node, offset);
292                         else if (DebugOpt > 1)
293                                 printf("%016jx -\n", offset);
294                 }
295                 offset += sizeof(*node);
296
297                 if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) {
298                         for (i = 0; i < node->count && i < maxcount; ++i) {
299                                 elm = &node->elms[i];
300                                 if (elm->base.btype == HAMMER_BTREE_TYPE_RECORD)
301                                         recover_elm(&elm->leaf);
302                         }
303                 }
304         }
305 }
306
307 static void
308 recover_elm(hammer_btree_leaf_elm_t leaf)
309 {
310         struct buffer_info *data_buffer = NULL;
311         struct recover_dict *dict;
312         struct recover_dict *dict2;
313         hammer_data_ondisk_t ondisk;
314         hammer_off_t data_offset;
315         struct stat st;
316         int chunk;
317         int len;
318         int zfill;
319         int64_t file_offset;
320         uint16_t pfs_id;
321         size_t nlen;
322         int fd;
323         char *name;
324         char *path1;
325         char *path2;
326
327         /*
328          * Ignore deleted records
329          */
330         if (leaf->delete_ts)
331                 return;
332
333         /*
334          * If we're running full scan, it's possible that data_offset
335          * refers to old filesystem data that we can't physically access.
336          */
337         data_offset = leaf->data_offset;
338         if (get_volume(HAMMER_VOL_DECODE(data_offset)) == NULL)
339                 return;
340
341         if (data_offset != 0)
342                 ondisk = get_buffer_data(data_offset, &data_buffer, 0);
343         else
344                 ondisk = NULL;
345         if (ondisk == NULL)
346                 goto done;
347
348         len = leaf->data_len;
349         chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
350         if (chunk > len)
351                 chunk = len;
352
353         if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
354                 goto done;
355
356         pfs_id = lo_to_pfs(leaf->base.localization);
357
358         /*
359          * Note that meaning of leaf->base.obj_id differs depending
360          * on record type.  For a direntry, leaf->base.obj_id points
361          * to its parent inode that this entry is a part of, but not
362          * its corresponding inode.
363          */
364         dict = get_dict(leaf->base.obj_id, pfs_id);
365
366         switch(leaf->base.rec_type) {
367         case HAMMER_RECTYPE_INODE:
368                 /*
369                  * We found an inode which also tells us where the file
370                  * or directory is in the directory hierarchy.
371                  */
372                 if (VerboseOpt) {
373                         printf("inode %016jx:%05d found\n",
374                                 (uintmax_t)leaf->base.obj_id, pfs_id);
375                 }
376                 path1 = recover_path(dict);
377
378                 /*
379                  * Attach the inode to its parent.  This isn't strictly
380                  * necessary because the information is also in the
381                  * directory entries, but if we do not find the directory
382                  * entry this ensures that the files will still be
383                  * reasonably well organized in their proper directories.
384                  */
385                 if ((dict->flags & DICTF_PARENT) == 0 &&
386                     dict->obj_id != HAMMER_OBJID_ROOT &&
387                     ondisk->inode.parent_obj_id != 0) {
388                         dict->flags |= DICTF_PARENT;
389                         dict->parent = get_dict(ondisk->inode.parent_obj_id,
390                                                 pfs_id);
391                         if (dict->parent &&
392                             (dict->parent->flags & DICTF_MADEDIR) == 0) {
393                                 dict->parent->flags |= DICTF_MADEDIR;
394                                 path2 = recover_path(dict->parent);
395                                 printf("mkdir %s\n", path2);
396                                 mkdir(path2, 0777);
397                                 free(path2);
398                                 path2 = NULL;
399                         }
400                 }
401                 if (dict->obj_type == 0)
402                         dict->obj_type = ondisk->inode.obj_type;
403                 dict->size = ondisk->inode.size;
404                 path2 = recover_path(dict);
405
406                 if (lstat(path1, &st) == 0) {
407                         if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
408                                 truncate(path1, dict->size);
409                                 /* chmod(path1, 0666); */
410                         }
411                         if (strcmp(path1, path2)) {
412                                 printf("Rename (inode) %s -> %s\n", path1, path2);
413                                 rename(path1, path2);
414                         }
415                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
416                         printf("mkinode (file) %s\n", path2);
417                         fd = open(path2, O_RDWR|O_CREAT, 0666);
418                         if (fd > 0)
419                                 close(fd);
420                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
421                         printf("mkinode (dir) %s\n", path2);
422                         mkdir(path2, 0777);
423                         dict->flags |= DICTF_MADEDIR;
424                 }
425                 free(path1);
426                 free(path2);
427                 break;
428         case HAMMER_RECTYPE_DATA:
429                 /*
430                  * File record data
431                  */
432                 if (leaf->base.obj_id == 0)
433                         break;
434                 if (VerboseOpt) {
435                         printf("inode %016jx:%05d data %016jx,%d\n",
436                                 (uintmax_t)leaf->base.obj_id,
437                                 pfs_id,
438                                 (uintmax_t)leaf->base.key - len,
439                                 len);
440                 }
441
442                 /*
443                  * Update the dictionary entry
444                  */
445                 if (dict->obj_type == 0)
446                         dict->obj_type = HAMMER_OBJTYPE_REGFILE;
447
448                 /*
449                  * If the parent directory has not been created we
450                  * have to create it (typically a PFS%05d)
451                  */
452                 if (dict->parent &&
453                     (dict->parent->flags & DICTF_MADEDIR) == 0) {
454                         dict->parent->flags |= DICTF_MADEDIR;
455                         path2 = recover_path(dict->parent);
456                         printf("mkdir %s\n", path2);
457                         mkdir(path2, 0777);
458                         free(path2);
459                         path2 = NULL;
460                 }
461
462                 /*
463                  * Create the file if necessary, report file creations
464                  */
465                 path1 = recover_path(dict);
466                 if (CachedPath && strcmp(CachedPath, path1) == 0) {
467                         fd = CachedFd;
468                 } else {
469                         fd = open(path1, O_CREAT|O_RDWR, 0666);
470                 }
471                 if (fd < 0) {
472                         printf("Unable to create %s: %s\n",
473                                 path1, strerror(errno));
474                         free(path1);
475                         break;
476                 }
477                 if ((dict->flags & DICTF_MADEFILE) == 0) {
478                         dict->flags |= DICTF_MADEFILE;
479                         printf("mkfile %s\n", path1);
480                 }
481
482                 /*
483                  * And write the record.  A HAMMER data block is aligned
484                  * and may contain trailing zeros after the file EOF.  The
485                  * inode record is required to get the actual file size.
486                  *
487                  * However, when the inode record is not available
488                  * we can do a sparse write and that will get it right
489                  * most of the time even if the inode record is never
490                  * found.
491                  */
492                 file_offset = (int64_t)leaf->base.key - len;
493                 lseek(fd, (off_t)file_offset, SEEK_SET);
494                 while (len) {
495                         if (dict->size == -1) {
496                                 for (zfill = chunk - 1; zfill >= 0; --zfill) {
497                                         if (((char *)ondisk)[zfill])
498                                                 break;
499                                 }
500                                 ++zfill;
501                         } else {
502                                 zfill = chunk;
503                         }
504
505                         if (zfill)
506                                 write(fd, ondisk, zfill);
507                         if (zfill < chunk)
508                                 lseek(fd, chunk - zfill, SEEK_CUR);
509
510                         len -= chunk;
511                         data_offset += chunk;
512                         file_offset += chunk;
513                         ondisk = get_buffer_data(data_offset, &data_buffer, 0);
514                         if (ondisk == NULL)
515                                 break;
516                         chunk = HAMMER_BUFSIZE -
517                                 ((int)data_offset & HAMMER_BUFMASK);
518                         if (chunk > len)
519                                 chunk = len;
520                 }
521                 if (dict->size >= 0 && file_offset > dict->size) {
522                         ftruncate(fd, dict->size);
523                         /* fchmod(fd, 0666); */
524                 }
525
526                 if (fd == CachedFd) {
527                         free(path1);
528                 } else if (CachedPath) {
529                         free(CachedPath);
530                         close(CachedFd);
531                         CachedPath = path1;
532                         CachedFd = fd;
533                 } else {
534                         CachedPath = path1;
535                         CachedFd = fd;
536                 }
537                 break;
538         case HAMMER_RECTYPE_DIRENTRY:
539                 nlen = len - HAMMER_ENTRY_NAME_OFF;
540                 if ((int)nlen < 0)      /* illegal length */
541                         break;
542                 if (ondisk->entry.obj_id == 0 ||
543                     ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
544                         break;
545                 name = malloc(nlen + 1);
546                 bcopy(ondisk->entry.name, name, nlen);
547                 name[nlen] = 0;
548                 sanitize_string(name);
549
550                 if (VerboseOpt) {
551                         printf("dir %016jx:%05d entry %016jx \"%s\"\n",
552                                 (uintmax_t)leaf->base.obj_id,
553                                 pfs_id,
554                                 (uintmax_t)ondisk->entry.obj_id,
555                                 name);
556                 }
557
558                 /*
559                  * We can't deal with hardlinks so if the object already
560                  * has a name assigned to it we just keep using that name.
561                  */
562                 dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
563                 path1 = recover_path(dict2);
564
565                 if (dict2->name == NULL)
566                         dict2->name = name;
567                 else
568                         free(name);
569
570                 /*
571                  * Attach dict2 to its directory (dict), create the
572                  * directory (dict) if necessary.  We must ensure
573                  * that the directory entry exists in order to be
574                  * able to properly rename() the file without creating
575                  * a namespace conflict.
576                  */
577                 if ((dict2->flags & DICTF_PARENT) == 0) {
578                         dict2->flags |= DICTF_PARENT;
579                         dict2->parent = dict;
580                         if ((dict->flags & DICTF_MADEDIR) == 0) {
581                                 dict->flags |= DICTF_MADEDIR;
582                                 path2 = recover_path(dict);
583                                 printf("mkdir %s\n", path2);
584                                 mkdir(path2, 0777);
585                                 free(path2);
586                                 path2 = NULL;
587                         }
588                 }
589                 path2 = recover_path(dict2);
590                 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
591                         printf("Rename (entry) %s -> %s\n", path1, path2);
592                         rename(path1, path2);
593                 }
594                 free(path1);
595                 free(path2);
596                 break;
597         default:
598                 /*
599                  * Ignore any other record types
600                  */
601                 break;
602         }
603 done:
604         rel_buffer(data_buffer);
605 }
606
607 #define RD_HSIZE        32768
608 #define RD_HMASK        (RD_HSIZE - 1)
609
610 struct recover_dict *RDHash[RD_HSIZE];
611
612 static
613 struct recover_dict *
614 get_dict(int64_t obj_id, uint16_t pfs_id)
615 {
616         struct recover_dict *dict;
617         int i;
618
619         if (obj_id == 0)
620                 return(NULL);
621
622         i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
623         for (dict = RDHash[i]; dict; dict = dict->next) {
624                 if (dict->obj_id == obj_id &&
625                     dict->pfs_id == pfs_id) {
626                         break;
627                 }
628         }
629         if (dict == NULL) {
630                 dict = malloc(sizeof(*dict));
631                 bzero(dict, sizeof(*dict));
632                 dict->obj_id = obj_id;
633                 dict->pfs_id = pfs_id;
634                 dict->next = RDHash[i];
635                 dict->size = -1;
636                 RDHash[i] = dict;
637
638                 /*
639                  * Always connect dangling dictionary entries to object 1
640                  * (the root of the PFS).
641                  *
642                  * DICTF_PARENT will not be set until we know what the
643                  * real parent directory object is.
644                  */
645                 if (dict->obj_id != HAMMER_OBJID_ROOT)
646                         dict->parent = get_dict(HAMMER_OBJID_ROOT, pfs_id);
647         }
648         return(dict);
649 }
650
651 struct path_info {
652         enum { PI_FIGURE, PI_LOAD } state;
653         uint16_t pfs_id;
654         char *base;
655         char *next;
656         int len;
657 };
658
659 static void recover_path_helper(struct recover_dict *, struct path_info *);
660
661 static
662 char *
663 recover_path(struct recover_dict *dict)
664 {
665         struct path_info info;
666
667         /* Find info.len first */
668         bzero(&info, sizeof(info));
669         info.state = PI_FIGURE;
670         recover_path_helper(dict, &info);
671
672         /* Fill in the path */
673         info.pfs_id = dict->pfs_id;
674         info.base = malloc(info.len);
675         info.next = info.base;
676         info.state = PI_LOAD;
677         recover_path_helper(dict, &info);
678
679         /* Return the path */
680         return(info.base);
681 }
682
683 #define STRLEN_OBJID    22      /* "obj_0x%016jx" */
684 #define STRLEN_PFSID    8       /* "PFS%05d" */
685
686 static
687 void
688 recover_path_helper(struct recover_dict *dict, struct path_info *info)
689 {
690         /*
691          * Calculate path element length
692          */
693         dict->flags |= DICTF_TRAVERSED;
694
695         switch(info->state) {
696         case PI_FIGURE:
697                 if (dict->obj_id == HAMMER_OBJID_ROOT)
698                         info->len += STRLEN_PFSID;
699                 else if (dict->name)
700                         info->len += strlen(dict->name);
701                 else
702                         info->len += STRLEN_OBJID;
703                 ++info->len;
704
705                 if (dict->parent &&
706                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
707                         recover_path_helper(dict->parent, info);
708                 } else {
709                         info->len += strlen(TargetDir) + 1;
710                 }
711                 break;
712         case PI_LOAD:
713                 if (dict->parent &&
714                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
715                         recover_path_helper(dict->parent, info);
716                 } else {
717                         strcpy(info->next, TargetDir);
718                         info->next += strlen(info->next);
719                 }
720
721                 *info->next++ = '/';
722                 if (dict->obj_id == HAMMER_OBJID_ROOT) {
723                         snprintf(info->next, STRLEN_PFSID + 1,
724                                 "PFS%05d", info->pfs_id);
725                 } else if (dict->name) {
726                         strcpy(info->next, dict->name);
727                 } else {
728                         snprintf(info->next, STRLEN_OBJID + 1,
729                                 "obj_0x%016jx", (uintmax_t)dict->obj_id);
730                 }
731                 info->next += strlen(info->next);
732                 break;
733         }
734         dict->flags &= ~DICTF_TRAVERSED;
735 }
736
737 static
738 void
739 sanitize_string(char *str)
740 {
741         while (*str) {
742                 if (!isprint(*str))
743                         *str = 'x';
744                 ++str;
745         }
746 }
747
748 static
749 hammer_off_t
750 scan_raw_limit(void)
751 {
752         struct volume_info *volume;
753         hammer_blockmap_t rootmap;
754         hammer_blockmap_layer1_t layer1;
755         hammer_blockmap_layer2_t layer2;
756         struct buffer_info *buffer1 = NULL;
757         struct buffer_info *buffer2 = NULL;
758         hammer_off_t layer1_offset;
759         hammer_off_t layer2_offset;
760         hammer_off_t phys_offset;
761         hammer_off_t block_offset;
762         hammer_off_t offset = 0;
763         int zone = HAMMER_ZONE_FREEMAP_INDEX;
764
765         volume = get_root_volume();
766         rootmap = &volume->ondisk->vol0_blockmap[zone];
767         assert(rootmap->phys_offset != 0);
768
769         for (phys_offset = HAMMER_ZONE_ENCODE(zone, 0);
770              phys_offset < HAMMER_ZONE_ENCODE(zone, HAMMER_OFF_LONG_MASK);
771              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
772                 /*
773                  * Dive layer 1.
774                  */
775                 layer1_offset = rootmap->phys_offset +
776                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
777                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
778
779                 if (!hammer_crc_test_layer1(HammerVersion, layer1)) {
780                         offset = 0; /* failed */
781                         goto end;
782                 }
783                 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
784                         continue;
785
786                 for (block_offset = 0;
787                      block_offset < HAMMER_BLOCKMAP_LAYER2;
788                      block_offset += HAMMER_BIGBLOCK_SIZE) {
789                         /*
790                          * Dive layer 2, each entry represents a big-block.
791                          */
792                         layer2_offset = layer1->phys_offset +
793                                         HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
794                         layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
795
796                         if (!hammer_crc_test_layer2(HammerVersion, layer2)) {
797                                 offset = 0; /* failed */
798                                 goto end;
799                         }
800                         if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) {
801                                 break;
802                         } else if (layer2->zone && layer2->zone != zone) {
803                                 offset = phys_offset + block_offset;
804                         }
805                 }
806         }
807 end:
808         rel_buffer(buffer1);
809         rel_buffer(buffer2);
810
811         return(hammer_xlate_to_zone2(offset));
812 }
813
814 static
815 void
816 scan_bigblocks(int target_zone)
817 {
818         struct volume_info *volume;
819         hammer_blockmap_t rootmap;
820         hammer_blockmap_layer1_t layer1;
821         hammer_blockmap_layer2_t layer2;
822         struct buffer_info *buffer1 = NULL;
823         struct buffer_info *buffer2 = NULL;
824         hammer_off_t layer1_offset;
825         hammer_off_t layer2_offset;
826         hammer_off_t phys_offset;
827         hammer_off_t block_offset;
828         hammer_off_t offset = 0;
829         int zone = HAMMER_ZONE_FREEMAP_INDEX;
830
831         volume = get_root_volume();
832         rootmap = &volume->ondisk->vol0_blockmap[zone];
833         assert(rootmap->phys_offset != 0);
834
835         for (phys_offset = HAMMER_ZONE_ENCODE(zone, 0);
836              phys_offset < HAMMER_ZONE_ENCODE(zone, HAMMER_OFF_LONG_MASK);
837              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
838                 /*
839                  * Dive layer 1.
840                  */
841                 layer1_offset = rootmap->phys_offset +
842                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
843                 layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
844
845                 /*
846                 if (!hammer_crc_test_layer1(HammerVersion, layer1)) {
847                 }
848                 */
849                 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
850                         continue;
851
852                 for (block_offset = 0;
853                      block_offset < HAMMER_BLOCKMAP_LAYER2;
854                      block_offset += HAMMER_BIGBLOCK_SIZE) {
855                         offset = phys_offset + block_offset;
856                         /*
857                          * Dive layer 2, each entry represents a big-block.
858                          */
859                         layer2_offset = layer1->phys_offset +
860                                         HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
861                         layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
862
863                         /*
864                         if (!hammer_crc_test_layer2(HammerVersion, layer2)) {
865                         }
866                         */
867                         if (layer2->zone == target_zone) {
868                                 add_bigblock_entry(offset, layer1, layer2);
869                         } else if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) {
870                                 break;
871                         }
872                 }
873         }
874         rel_buffer(buffer1);
875         rel_buffer(buffer2);
876 }
877
878 static
879 void
880 free_bigblocks(void)
881 {
882         bigblock_t b;
883
884         while ((b = RB_ROOT(&ZoneTree)) != NULL) {
885                 RB_REMOVE(bigblock_rb_tree, &ZoneTree, b);
886                 free(b);
887         }
888         assert(RB_EMPTY(&ZoneTree));
889 }
890
891 static
892 void
893 add_bigblock_entry(hammer_off_t offset,
894         hammer_blockmap_layer1_t layer1, hammer_blockmap_layer2_t layer2)
895 {
896         bigblock_t b;
897
898         b = calloc(1, sizeof(*b));
899         b->phys_offset = hammer_xlate_to_zone2(offset);
900         assert((b->phys_offset & HAMMER_BIGBLOCK_MASK64) == 0);
901         bcopy(layer1, &b->layer1, sizeof(*layer1));
902         bcopy(layer2, &b->layer2, sizeof(*layer2));
903
904         RB_INSERT(bigblock_rb_tree, &ZoneTree, b);
905 }
906
907 static
908 bigblock_t
909 get_bigblock_entry(hammer_off_t offset)
910 {
911         bigblock_t b;
912
913         offset = hammer_xlate_to_zone2(offset);
914         offset &= ~HAMMER_BIGBLOCK_MASK64;
915
916         b = RB_LOOKUP(bigblock_rb_tree, &ZoneTree, offset);
917         if (b)
918                 return(b);
919         return(NULL);
920 }