sbin/hammer: Minor fix for hammer(8) manpage
[dragonfly.git] / sbin / hammer / cmd_recover.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer.h"
36
37 struct recover_dict {
38         struct recover_dict *next;
39         struct recover_dict *parent;
40         int64_t obj_id;
41         uint8_t obj_type;
42         uint8_t flags;
43         uint16_t pfs_id;
44         int64_t size;
45         char    *name;
46 };
47
48 #define DICTF_MADEDIR   0x01
49 #define DICTF_MADEFILE  0x02
50 #define DICTF_PARENT    0x04    /* parent attached for real */
51 #define DICTF_TRAVERSED 0x80
52
53 static void recover_top(char *ptr);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62
63 void
64 hammer_cmd_recover(const char *target_dir)
65 {
66         struct buffer_info *data_buffer;
67         struct volume_info *scan;
68         struct volume_info *volume;
69         hammer_off_t off;
70         hammer_off_t off_end;
71         char *ptr;
72
73         AssertOnFailure = 0;
74         TargetDir = target_dir;
75
76         printf("Running raw scan of HAMMER image, recovering to %s\n",
77                 TargetDir);
78         mkdir(TargetDir, 0777);
79
80         data_buffer = NULL;
81         TAILQ_FOREACH(scan, &VolList, entry) {
82                 volume = get_volume(scan->vol_no);
83
84                 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
85                 off_end = off + (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg);
86                 while (off < off_end) {
87                         ptr = get_buffer_data(off, &data_buffer, 0);
88                         if (ptr) {
89                                 recover_top(ptr);
90                                 off += HAMMER_BUFSIZE;
91                         }
92                 }
93         }
94         rel_buffer(data_buffer);
95
96         if (CachedPath) {
97                 free(CachedPath);
98                 close(CachedFd);
99                 CachedPath = NULL;
100                 CachedFd = -1;
101         }
102
103         AssertOnFailure = 1;
104 }
105
106 /*
107  * Top level recovery processor.  Assume the data is a B-Tree node.
108  * If the CRC is good we attempt to process the node, building the
109  * object space and creating the dictionary as we go.
110  */
111 static void
112 recover_top(char *ptr)
113 {
114         struct hammer_node_ondisk *node;
115         hammer_btree_elm_t elm;
116         int maxcount;
117         int i;
118
119         for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
120                 if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) ==
121                     node->crc &&
122                     node->type == HAMMER_BTREE_TYPE_LEAF) {
123                         /*
124                          * Scan elements
125                          */
126                         maxcount = HAMMER_BTREE_LEAF_ELMS;
127                         for (i = 0; i < node->count && i < maxcount; ++i) {
128                                 elm = &node->elms[i];
129                                 if (elm->base.btype != 'R')
130                                         continue;
131                                 recover_elm(&elm->leaf);
132                         }
133                 }
134         }
135 }
136
137 static void
138 recover_elm(hammer_btree_leaf_elm_t leaf)
139 {
140         struct buffer_info *data_buffer = NULL;
141         struct recover_dict *dict;
142         struct recover_dict *dict2;
143         hammer_data_ondisk_t ondisk;
144         hammer_off_t data_offset;
145         struct stat st;
146         int chunk;
147         int len;
148         int zfill;
149         int64_t file_offset;
150         uint16_t pfs_id;
151         size_t nlen;
152         int fd;
153         char *name;
154         char *path1;
155         char *path2;
156
157         /*
158          * Ignore deleted records
159          */
160         if (leaf->delete_ts)
161                 return;
162         if ((data_offset = leaf->data_offset) != 0)
163                 ondisk = get_buffer_data(data_offset, &data_buffer, 0);
164         else
165                 ondisk = NULL;
166         if (ondisk == NULL)
167                 goto done;
168
169         len = leaf->data_len;
170         chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
171         if (chunk > len)
172                 chunk = len;
173
174         if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
175                 goto done;
176
177         pfs_id = lo_to_pfs(leaf->base.localization);
178
179         dict = get_dict(leaf->base.obj_id, pfs_id);
180
181         switch(leaf->base.rec_type) {
182         case HAMMER_RECTYPE_INODE:
183                 /*
184                  * We found an inode which also tells us where the file
185                  * or directory is in the directory hierarchy.
186                  */
187                 if (VerboseOpt) {
188                         printf("file %016jx:%05d inode found\n",
189                                 (uintmax_t)leaf->base.obj_id, pfs_id);
190                 }
191                 path1 = recover_path(dict);
192
193                 /*
194                  * Attach the inode to its parent.  This isn't strictly
195                  * necessary because the information is also in the
196                  * directory entries, but if we do not find the directory
197                  * entry this ensures that the files will still be
198                  * reasonably well organized in their proper directories.
199                  */
200                 if ((dict->flags & DICTF_PARENT) == 0 &&
201                     dict->obj_id != HAMMER_OBJID_ROOT &&
202                     ondisk->inode.parent_obj_id != 0) {
203                         dict->flags |= DICTF_PARENT;
204                         dict->parent = get_dict(ondisk->inode.parent_obj_id,
205                                                 pfs_id);
206                         if (dict->parent &&
207                             (dict->parent->flags & DICTF_MADEDIR) == 0) {
208                                 dict->parent->flags |= DICTF_MADEDIR;
209                                 path2 = recover_path(dict->parent);
210                                 printf("mkdir %s\n", path2);
211                                 mkdir(path2, 0777);
212                                 free(path2);
213                                 path2 = NULL;
214                         }
215                 }
216                 if (dict->obj_type == 0)
217                         dict->obj_type = ondisk->inode.obj_type;
218                 dict->size = ondisk->inode.size;
219                 path2 = recover_path(dict);
220
221                 if (lstat(path1, &st) == 0) {
222                         if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
223                                 truncate(path1, dict->size);
224                                 /* chmod(path1, 0666); */
225                         }
226                         if (strcmp(path1, path2)) {
227                                 printf("Rename %s -> %s\n", path1, path2);
228                                 rename(path1, path2);
229                         }
230                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
231                         printf("mkinode (file) %s\n", path2);
232                         fd = open(path2, O_RDWR|O_CREAT, 0666);
233                         if (fd > 0)
234                                 close(fd);
235                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
236                         printf("mkinode (dir) %s\n", path2);
237                         mkdir(path2, 0777);
238                         dict->flags |= DICTF_MADEDIR;
239                 }
240                 free(path1);
241                 free(path2);
242                 break;
243         case HAMMER_RECTYPE_DATA:
244                 /*
245                  * File record data
246                  */
247                 if (leaf->base.obj_id == 0)
248                         break;
249                 if (VerboseOpt) {
250                         printf("file %016jx:%05d data %016jx,%d\n",
251                                 (uintmax_t)leaf->base.obj_id,
252                                 pfs_id,
253                                 (uintmax_t)leaf->base.key - len,
254                                 len);
255                 }
256
257                 /*
258                  * Update the dictionary entry
259                  */
260                 if (dict->obj_type == 0)
261                         dict->obj_type = HAMMER_OBJTYPE_REGFILE;
262
263                 /*
264                  * If the parent directory has not been created we
265                  * have to create it (typically a PFS%05d)
266                  */
267                 if (dict->parent &&
268                     (dict->parent->flags & DICTF_MADEDIR) == 0) {
269                         dict->parent->flags |= DICTF_MADEDIR;
270                         path2 = recover_path(dict->parent);
271                         printf("mkdir %s\n", path2);
272                         mkdir(path2, 0777);
273                         free(path2);
274                         path2 = NULL;
275                 }
276
277                 /*
278                  * Create the file if necessary, report file creations
279                  */
280                 path1 = recover_path(dict);
281                 if (CachedPath && strcmp(CachedPath, path1) == 0) {
282                         fd = CachedFd;
283                 } else {
284                         fd = open(path1, O_CREAT|O_RDWR, 0666);
285                 }
286                 if (fd < 0) {
287                         printf("Unable to create %s: %s\n",
288                                 path1, strerror(errno));
289                         free(path1);
290                         break;
291                 }
292                 if ((dict->flags & DICTF_MADEFILE) == 0) {
293                         dict->flags |= DICTF_MADEFILE;
294                         printf("mkfile %s\n", path1);
295                 }
296
297                 /*
298                  * And write the record.  A HAMMER data block is aligned
299                  * and may contain trailing zeros after the file EOF.  The
300                  * inode record is required to get the actual file size.
301                  *
302                  * However, when the inode record is not available
303                  * we can do a sparse write and that will get it right
304                  * most of the time even if the inode record is never
305                  * found.
306                  */
307                 file_offset = (int64_t)leaf->base.key - len;
308                 lseek(fd, (off_t)file_offset, SEEK_SET);
309                 while (len) {
310                         if (dict->size == -1) {
311                                 for (zfill = chunk - 1; zfill >= 0; --zfill) {
312                                         if (((char *)ondisk)[zfill])
313                                                 break;
314                                 }
315                                 ++zfill;
316                         } else {
317                                 zfill = chunk;
318                         }
319
320                         if (zfill)
321                                 write(fd, ondisk, zfill);
322                         if (zfill < chunk)
323                                 lseek(fd, chunk - zfill, SEEK_CUR);
324
325                         len -= chunk;
326                         data_offset += chunk;
327                         file_offset += chunk;
328                         ondisk = get_buffer_data(data_offset, &data_buffer, 0);
329                         if (ondisk == NULL)
330                                 break;
331                         chunk = HAMMER_BUFSIZE -
332                                 ((int)data_offset & HAMMER_BUFMASK);
333                         if (chunk > len)
334                                 chunk = len;
335                 }
336                 if (dict->size >= 0 && file_offset > dict->size) {
337                         ftruncate(fd, dict->size);
338                         /* fchmod(fd, 0666); */
339                 }
340
341                 if (fd == CachedFd) {
342                         free(path1);
343                 } else if (CachedPath) {
344                         free(CachedPath);
345                         close(CachedFd);
346                         CachedPath = path1;
347                         CachedFd = fd;
348                 } else {
349                         CachedPath = path1;
350                         CachedFd = fd;
351                 }
352                 break;
353         case HAMMER_RECTYPE_DIRENTRY:
354                 nlen = len - offsetof(struct hammer_direntry_data, name[0]);
355                 if ((int)nlen < 0)      /* illegal length */
356                         break;
357                 if (ondisk->entry.obj_id == 0 ||
358                     ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
359                         break;
360                 name = malloc(nlen + 1);
361                 bcopy(ondisk->entry.name, name, nlen);
362                 name[nlen] = 0;
363                 sanitize_string(name);
364
365                 /*
366                  * We can't deal with hardlinks so if the object already
367                  * has a name assigned to it we just keep using that name.
368                  */
369                 dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
370                 path1 = recover_path(dict2);
371
372                 if (dict2->name == NULL)
373                         dict2->name = name;
374                 else
375                         free(name);
376
377                 /*
378                  * Attach dict2 to its directory (dict), create the
379                  * directory (dict) if necessary.  We must ensure
380                  * that the directory entry exists in order to be
381                  * able to properly rename() the file without creating
382                  * a namespace conflict.
383                  */
384                 if ((dict2->flags & DICTF_PARENT) == 0) {
385                         dict2->flags |= DICTF_PARENT;
386                         dict2->parent = dict;
387                         if ((dict->flags & DICTF_MADEDIR) == 0) {
388                                 dict->flags |= DICTF_MADEDIR;
389                                 path2 = recover_path(dict);
390                                 printf("mkdir %s\n", path2);
391                                 mkdir(path2, 0777);
392                                 free(path2);
393                                 path2 = NULL;
394                         }
395                 }
396                 path2 = recover_path(dict2);
397                 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
398                         printf("Rename %s -> %s\n", path1, path2);
399                         rename(path1, path2);
400                 }
401                 free(path1);
402                 free(path2);
403
404                 printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
405                         (uintmax_t)leaf->base.obj_id,
406                         pfs_id,
407                         (uintmax_t)ondisk->entry.obj_id,
408                         name);
409                 break;
410         default:
411                 /*
412                  * Ignore any other record types
413                  */
414                 break;
415         }
416 done:
417         rel_buffer(data_buffer);
418 }
419
420 #define RD_HSIZE        32768
421 #define RD_HMASK        (RD_HSIZE - 1)
422
423 struct recover_dict *RDHash[RD_HSIZE];
424
425 static
426 struct recover_dict *
427 get_dict(int64_t obj_id, uint16_t pfs_id)
428 {
429         struct recover_dict *dict;
430         int i;
431
432         if (obj_id == 0)
433                 return(NULL);
434
435         i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
436         for (dict = RDHash[i]; dict; dict = dict->next) {
437                 if (dict->obj_id == obj_id &&
438                     dict->pfs_id == pfs_id) {
439                         break;
440                 }
441         }
442         if (dict == NULL) {
443                 dict = malloc(sizeof(*dict));
444                 bzero(dict, sizeof(*dict));
445                 dict->obj_id = obj_id;
446                 dict->pfs_id = pfs_id;
447                 dict->next = RDHash[i];
448                 dict->size = -1;
449                 RDHash[i] = dict;
450
451                 /*
452                  * Always connect dangling dictionary entries to object 1
453                  * (the root of the PFS).
454                  *
455                  * DICTF_PARENT will not be set until we know what the
456                  * real parent directory object is.
457                  */
458                 if (dict->obj_id != HAMMER_OBJID_ROOT)
459                         dict->parent = get_dict(1, pfs_id);
460         }
461         return(dict);
462 }
463
464 struct path_info {
465         enum { PI_FIGURE, PI_LOAD } state;
466         uint16_t pfs_id;
467         char *base;
468         char *next;
469         int len;
470 };
471
472 static void recover_path_helper(struct recover_dict *, struct path_info *);
473
474 static
475 char *
476 recover_path(struct recover_dict *dict)
477 {
478         struct path_info info;
479
480         bzero(&info, sizeof(info));
481         info.pfs_id = dict->pfs_id;
482         info.state = PI_FIGURE;
483         recover_path_helper(dict, &info);
484         info.base = malloc(info.len);
485         info.next = info.base;
486         info.state = PI_LOAD;
487         recover_path_helper(dict, &info);
488
489         return(info.base);
490 }
491
492 static
493 void
494 recover_path_helper(struct recover_dict *dict, struct path_info *info)
495 {
496         /*
497          * Calculate path element length
498          */
499         dict->flags |= DICTF_TRAVERSED;
500
501         switch(info->state) {
502         case PI_FIGURE:
503                 if (dict->obj_id == HAMMER_OBJID_ROOT)
504                         info->len += 8;
505                 else if (dict->name)
506                         info->len += strlen(dict->name);
507                 else
508                         info->len += 6 + 16;
509                 ++info->len;
510
511                 if (dict->parent &&
512                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
513                         recover_path_helper(dict->parent, info);
514                 } else {
515                         info->len += strlen(TargetDir) + 1;
516                 }
517                 break;
518         case PI_LOAD:
519                 if (dict->parent &&
520                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
521                         recover_path_helper(dict->parent, info);
522                 } else {
523                         strcpy(info->next, TargetDir);
524                         info->next += strlen(info->next);
525                 }
526
527                 *info->next++ = '/';
528                 if (dict->obj_id == HAMMER_OBJID_ROOT) {
529                         snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
530                 } else if (dict->name) {
531                         strcpy(info->next, dict->name);
532                 } else {
533                         snprintf(info->next, 6+16+1, "obj_0x%016jx",
534                                 (uintmax_t)dict->obj_id);
535                 }
536                 info->next += strlen(info->next);
537                 break;
538         }
539         dict->flags &= ~DICTF_TRAVERSED;
540 }
541
542 static
543 void
544 sanitize_string(char *str)
545 {
546         while (*str) {
547                 if (!isprint(*str))
548                         *str = 'x';
549                 ++str;
550         }
551 }