0a19b5a5fa5226125de6b09bf32ae99ef47db351
[dragonfly.git] / sbin / hammer / cmd_recover.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer.h"
36
37 struct recover_dict {
38         struct recover_dict *next;
39         struct recover_dict *parent;
40         int64_t obj_id;
41         uint8_t obj_type;
42         uint8_t flags;
43         uint16_t pfs_id;
44         int64_t size;
45         char    *name;
46 };
47
48 #define DICTF_MADEDIR   0x01
49 #define DICTF_MADEFILE  0x02
50 #define DICTF_PARENT    0x04    /* parent attached for real */
51 #define DICTF_TRAVERSED 0x80
52
53 static void recover_top(char *ptr);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62
63 void
64 hammer_cmd_recover(const char *target_dir)
65 {
66         struct buffer_info *data_buffer;
67         struct volume_info *volume;
68         hammer_off_t off;
69         hammer_off_t off_end;
70         char *ptr;
71
72         AssertOnFailure = 0;
73         TargetDir = target_dir;
74
75         printf("Running raw scan of HAMMER image, recovering to %s\n",
76                 TargetDir);
77         mkdir(TargetDir, 0777);
78
79         data_buffer = NULL;
80         TAILQ_FOREACH(volume, &VolList, entry) {
81                 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
82                 off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk);
83                 while (off < off_end) {
84                         ptr = get_buffer_data(off, &data_buffer, 0);
85                         if (ptr) {
86                                 recover_top(ptr);
87                                 off += HAMMER_BUFSIZE;
88                         }
89                 }
90         }
91         rel_buffer(data_buffer);
92
93         if (CachedPath) {
94                 free(CachedPath);
95                 close(CachedFd);
96                 CachedPath = NULL;
97                 CachedFd = -1;
98         }
99
100         AssertOnFailure = 1;
101 }
102
103 /*
104  * Top level recovery processor.  Assume the data is a B-Tree node.
105  * If the CRC is good we attempt to process the node, building the
106  * object space and creating the dictionary as we go.
107  */
108 static void
109 recover_top(char *ptr)
110 {
111         struct hammer_node_ondisk *node;
112         hammer_btree_elm_t elm;
113         int maxcount;
114         int i;
115
116         for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
117                 if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) ==
118                     node->crc &&
119                     node->type == HAMMER_BTREE_TYPE_LEAF) {
120                         /*
121                          * Scan elements
122                          */
123                         maxcount = HAMMER_BTREE_LEAF_ELMS;
124                         for (i = 0; i < node->count && i < maxcount; ++i) {
125                                 elm = &node->elms[i];
126                                 if (elm->base.btype != 'R')
127                                         continue;
128                                 recover_elm(&elm->leaf);
129                         }
130                 }
131         }
132 }
133
134 static void
135 recover_elm(hammer_btree_leaf_elm_t leaf)
136 {
137         struct buffer_info *data_buffer = NULL;
138         struct recover_dict *dict;
139         struct recover_dict *dict2;
140         hammer_data_ondisk_t ondisk;
141         hammer_off_t data_offset;
142         struct stat st;
143         int chunk;
144         int len;
145         int zfill;
146         int64_t file_offset;
147         uint16_t pfs_id;
148         size_t nlen;
149         int fd;
150         char *name;
151         char *path1;
152         char *path2;
153
154         /*
155          * Ignore deleted records
156          */
157         if (leaf->delete_ts)
158                 return;
159         if ((data_offset = leaf->data_offset) != 0)
160                 ondisk = get_buffer_data(data_offset, &data_buffer, 0);
161         else
162                 ondisk = NULL;
163         if (ondisk == NULL)
164                 goto done;
165
166         len = leaf->data_len;
167         chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
168         if (chunk > len)
169                 chunk = len;
170
171         if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
172                 goto done;
173
174         pfs_id = lo_to_pfs(leaf->base.localization);
175
176         dict = get_dict(leaf->base.obj_id, pfs_id);
177
178         switch(leaf->base.rec_type) {
179         case HAMMER_RECTYPE_INODE:
180                 /*
181                  * We found an inode which also tells us where the file
182                  * or directory is in the directory hierarchy.
183                  */
184                 if (VerboseOpt) {
185                         printf("file %016jx:%05d inode found\n",
186                                 (uintmax_t)leaf->base.obj_id, pfs_id);
187                 }
188                 path1 = recover_path(dict);
189
190                 /*
191                  * Attach the inode to its parent.  This isn't strictly
192                  * necessary because the information is also in the
193                  * directory entries, but if we do not find the directory
194                  * entry this ensures that the files will still be
195                  * reasonably well organized in their proper directories.
196                  */
197                 if ((dict->flags & DICTF_PARENT) == 0 &&
198                     dict->obj_id != HAMMER_OBJID_ROOT &&
199                     ondisk->inode.parent_obj_id != 0) {
200                         dict->flags |= DICTF_PARENT;
201                         dict->parent = get_dict(ondisk->inode.parent_obj_id,
202                                                 pfs_id);
203                         if (dict->parent &&
204                             (dict->parent->flags & DICTF_MADEDIR) == 0) {
205                                 dict->parent->flags |= DICTF_MADEDIR;
206                                 path2 = recover_path(dict->parent);
207                                 printf("mkdir %s\n", path2);
208                                 mkdir(path2, 0777);
209                                 free(path2);
210                                 path2 = NULL;
211                         }
212                 }
213                 if (dict->obj_type == 0)
214                         dict->obj_type = ondisk->inode.obj_type;
215                 dict->size = ondisk->inode.size;
216                 path2 = recover_path(dict);
217
218                 if (lstat(path1, &st) == 0) {
219                         if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
220                                 truncate(path1, dict->size);
221                                 /* chmod(path1, 0666); */
222                         }
223                         if (strcmp(path1, path2)) {
224                                 printf("Rename %s -> %s\n", path1, path2);
225                                 rename(path1, path2);
226                         }
227                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
228                         printf("mkinode (file) %s\n", path2);
229                         fd = open(path2, O_RDWR|O_CREAT, 0666);
230                         if (fd > 0)
231                                 close(fd);
232                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
233                         printf("mkinode (dir) %s\n", path2);
234                         mkdir(path2, 0777);
235                         dict->flags |= DICTF_MADEDIR;
236                 }
237                 free(path1);
238                 free(path2);
239                 break;
240         case HAMMER_RECTYPE_DATA:
241                 /*
242                  * File record data
243                  */
244                 if (leaf->base.obj_id == 0)
245                         break;
246                 if (VerboseOpt) {
247                         printf("file %016jx:%05d data %016jx,%d\n",
248                                 (uintmax_t)leaf->base.obj_id,
249                                 pfs_id,
250                                 (uintmax_t)leaf->base.key - len,
251                                 len);
252                 }
253
254                 /*
255                  * Update the dictionary entry
256                  */
257                 if (dict->obj_type == 0)
258                         dict->obj_type = HAMMER_OBJTYPE_REGFILE;
259
260                 /*
261                  * If the parent directory has not been created we
262                  * have to create it (typically a PFS%05d)
263                  */
264                 if (dict->parent &&
265                     (dict->parent->flags & DICTF_MADEDIR) == 0) {
266                         dict->parent->flags |= DICTF_MADEDIR;
267                         path2 = recover_path(dict->parent);
268                         printf("mkdir %s\n", path2);
269                         mkdir(path2, 0777);
270                         free(path2);
271                         path2 = NULL;
272                 }
273
274                 /*
275                  * Create the file if necessary, report file creations
276                  */
277                 path1 = recover_path(dict);
278                 if (CachedPath && strcmp(CachedPath, path1) == 0) {
279                         fd = CachedFd;
280                 } else {
281                         fd = open(path1, O_CREAT|O_RDWR, 0666);
282                 }
283                 if (fd < 0) {
284                         printf("Unable to create %s: %s\n",
285                                 path1, strerror(errno));
286                         free(path1);
287                         break;
288                 }
289                 if ((dict->flags & DICTF_MADEFILE) == 0) {
290                         dict->flags |= DICTF_MADEFILE;
291                         printf("mkfile %s\n", path1);
292                 }
293
294                 /*
295                  * And write the record.  A HAMMER data block is aligned
296                  * and may contain trailing zeros after the file EOF.  The
297                  * inode record is required to get the actual file size.
298                  *
299                  * However, when the inode record is not available
300                  * we can do a sparse write and that will get it right
301                  * most of the time even if the inode record is never
302                  * found.
303                  */
304                 file_offset = (int64_t)leaf->base.key - len;
305                 lseek(fd, (off_t)file_offset, SEEK_SET);
306                 while (len) {
307                         if (dict->size == -1) {
308                                 for (zfill = chunk - 1; zfill >= 0; --zfill) {
309                                         if (((char *)ondisk)[zfill])
310                                                 break;
311                                 }
312                                 ++zfill;
313                         } else {
314                                 zfill = chunk;
315                         }
316
317                         if (zfill)
318                                 write(fd, ondisk, zfill);
319                         if (zfill < chunk)
320                                 lseek(fd, chunk - zfill, SEEK_CUR);
321
322                         len -= chunk;
323                         data_offset += chunk;
324                         file_offset += chunk;
325                         ondisk = get_buffer_data(data_offset, &data_buffer, 0);
326                         if (ondisk == NULL)
327                                 break;
328                         chunk = HAMMER_BUFSIZE -
329                                 ((int)data_offset & HAMMER_BUFMASK);
330                         if (chunk > len)
331                                 chunk = len;
332                 }
333                 if (dict->size >= 0 && file_offset > dict->size) {
334                         ftruncate(fd, dict->size);
335                         /* fchmod(fd, 0666); */
336                 }
337
338                 if (fd == CachedFd) {
339                         free(path1);
340                 } else if (CachedPath) {
341                         free(CachedPath);
342                         close(CachedFd);
343                         CachedPath = path1;
344                         CachedFd = fd;
345                 } else {
346                         CachedPath = path1;
347                         CachedFd = fd;
348                 }
349                 break;
350         case HAMMER_RECTYPE_DIRENTRY:
351                 nlen = len - offsetof(struct hammer_direntry_data, name[0]);
352                 if ((int)nlen < 0)      /* illegal length */
353                         break;
354                 if (ondisk->entry.obj_id == 0 ||
355                     ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
356                         break;
357                 name = malloc(nlen + 1);
358                 bcopy(ondisk->entry.name, name, nlen);
359                 name[nlen] = 0;
360                 sanitize_string(name);
361
362                 /*
363                  * We can't deal with hardlinks so if the object already
364                  * has a name assigned to it we just keep using that name.
365                  */
366                 dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
367                 path1 = recover_path(dict2);
368
369                 if (dict2->name == NULL)
370                         dict2->name = name;
371                 else
372                         free(name);
373
374                 /*
375                  * Attach dict2 to its directory (dict), create the
376                  * directory (dict) if necessary.  We must ensure
377                  * that the directory entry exists in order to be
378                  * able to properly rename() the file without creating
379                  * a namespace conflict.
380                  */
381                 if ((dict2->flags & DICTF_PARENT) == 0) {
382                         dict2->flags |= DICTF_PARENT;
383                         dict2->parent = dict;
384                         if ((dict->flags & DICTF_MADEDIR) == 0) {
385                                 dict->flags |= DICTF_MADEDIR;
386                                 path2 = recover_path(dict);
387                                 printf("mkdir %s\n", path2);
388                                 mkdir(path2, 0777);
389                                 free(path2);
390                                 path2 = NULL;
391                         }
392                 }
393                 path2 = recover_path(dict2);
394                 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
395                         printf("Rename %s -> %s\n", path1, path2);
396                         rename(path1, path2);
397                 }
398                 free(path1);
399                 free(path2);
400
401                 printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
402                         (uintmax_t)leaf->base.obj_id,
403                         pfs_id,
404                         (uintmax_t)ondisk->entry.obj_id,
405                         name);
406                 break;
407         default:
408                 /*
409                  * Ignore any other record types
410                  */
411                 break;
412         }
413 done:
414         rel_buffer(data_buffer);
415 }
416
417 #define RD_HSIZE        32768
418 #define RD_HMASK        (RD_HSIZE - 1)
419
420 struct recover_dict *RDHash[RD_HSIZE];
421
422 static
423 struct recover_dict *
424 get_dict(int64_t obj_id, uint16_t pfs_id)
425 {
426         struct recover_dict *dict;
427         int i;
428
429         if (obj_id == 0)
430                 return(NULL);
431
432         i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
433         for (dict = RDHash[i]; dict; dict = dict->next) {
434                 if (dict->obj_id == obj_id &&
435                     dict->pfs_id == pfs_id) {
436                         break;
437                 }
438         }
439         if (dict == NULL) {
440                 dict = malloc(sizeof(*dict));
441                 bzero(dict, sizeof(*dict));
442                 dict->obj_id = obj_id;
443                 dict->pfs_id = pfs_id;
444                 dict->next = RDHash[i];
445                 dict->size = -1;
446                 RDHash[i] = dict;
447
448                 /*
449                  * Always connect dangling dictionary entries to object 1
450                  * (the root of the PFS).
451                  *
452                  * DICTF_PARENT will not be set until we know what the
453                  * real parent directory object is.
454                  */
455                 if (dict->obj_id != HAMMER_OBJID_ROOT)
456                         dict->parent = get_dict(1, pfs_id);
457         }
458         return(dict);
459 }
460
461 struct path_info {
462         enum { PI_FIGURE, PI_LOAD } state;
463         uint16_t pfs_id;
464         char *base;
465         char *next;
466         int len;
467 };
468
469 static void recover_path_helper(struct recover_dict *, struct path_info *);
470
471 static
472 char *
473 recover_path(struct recover_dict *dict)
474 {
475         struct path_info info;
476
477         bzero(&info, sizeof(info));
478         info.pfs_id = dict->pfs_id;
479         info.state = PI_FIGURE;
480         recover_path_helper(dict, &info);
481         info.base = malloc(info.len);
482         info.next = info.base;
483         info.state = PI_LOAD;
484         recover_path_helper(dict, &info);
485
486         return(info.base);
487 }
488
489 static
490 void
491 recover_path_helper(struct recover_dict *dict, struct path_info *info)
492 {
493         /*
494          * Calculate path element length
495          */
496         dict->flags |= DICTF_TRAVERSED;
497
498         switch(info->state) {
499         case PI_FIGURE:
500                 if (dict->obj_id == HAMMER_OBJID_ROOT)
501                         info->len += 8;
502                 else if (dict->name)
503                         info->len += strlen(dict->name);
504                 else
505                         info->len += 6 + 16;
506                 ++info->len;
507
508                 if (dict->parent &&
509                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
510                         recover_path_helper(dict->parent, info);
511                 } else {
512                         info->len += strlen(TargetDir) + 1;
513                 }
514                 break;
515         case PI_LOAD:
516                 if (dict->parent &&
517                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
518                         recover_path_helper(dict->parent, info);
519                 } else {
520                         strcpy(info->next, TargetDir);
521                         info->next += strlen(info->next);
522                 }
523
524                 *info->next++ = '/';
525                 if (dict->obj_id == HAMMER_OBJID_ROOT) {
526                         snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
527                 } else if (dict->name) {
528                         strcpy(info->next, dict->name);
529                 } else {
530                         snprintf(info->next, 6+16+1, "obj_0x%016jx",
531                                 (uintmax_t)dict->obj_id);
532                 }
533                 info->next += strlen(info->next);
534                 break;
535         }
536         dict->flags &= ~DICTF_TRAVERSED;
537 }
538
539 static
540 void
541 sanitize_string(char *str)
542 {
543         while (*str) {
544                 if (!isprint(*str))
545                         *str = 'x';
546                 ++str;
547         }
548 }