Initial import of binutils 2.22 on the new vendor branch
[dragonfly.git] / sbin / hammer / cmd_recover.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer.h"
36
37 struct recover_dict {
38         struct recover_dict *next;
39         struct recover_dict *parent;
40         int64_t obj_id;
41         uint8_t obj_type;
42         uint8_t flags;
43         uint16_t llid;
44         int64_t size;
45         char    *name;
46 };
47
48 #define DICTF_MADEDIR   0x01
49 #define DICTF_MADEFILE  0x02
50 #define DICTF_PARENT    0x04    /* parent attached for real */
51 #define DICTF_TRAVERSED 0x80
52
53 static void recover_top(char *ptr);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t llid);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62
63 void
64 hammer_cmd_recover(const char *target_dir)
65 {
66         struct buffer_info *data_buffer;
67         struct volume_info *scan;
68         struct volume_info *volume;
69         hammer_off_t off;
70         hammer_off_t off_end;
71         char *ptr;
72
73         AssertOnFailure = 0;
74         TargetDir = target_dir;
75
76         printf("Running raw scan of HAMMER image, recovering to %s\n",
77                 TargetDir);
78         mkdir(TargetDir, 0777);
79
80         data_buffer = NULL;
81         TAILQ_FOREACH(scan, &VolList, entry) {
82                 volume = get_volume(scan->vol_no);
83
84                 off = HAMMER_ZONE_RAW_BUFFER + 0;
85                 off |= HAMMER_VOL_ENCODE(volume->vol_no);
86                 off_end = off + (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg);
87                 while (off < off_end) {
88                         ptr = get_buffer_data(off, &data_buffer, 0);
89                         if (ptr) {
90                                 recover_top(ptr);
91                                 off += HAMMER_BUFSIZE;
92                         }
93                 }
94         }
95         if (data_buffer)
96                 rel_buffer(data_buffer);
97
98         if (CachedPath) {
99                 free(CachedPath);
100                 close(CachedFd);
101                 CachedPath = NULL;
102                 CachedFd = -1;
103         }
104
105         AssertOnFailure = 1;
106 }
107
108 /*
109  * Top level recovery processor.  Assume the data is a B-Tree node.
110  * If the CRC is good we attempt to process the node, building the
111  * object space and creating the dictionary as we go.
112  */
113 static void
114 recover_top(char *ptr)
115 {
116         struct hammer_node_ondisk *node;
117         hammer_btree_elm_t elm;
118         int maxcount;
119         int i;
120
121         for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
122                 if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) ==
123                     node->crc &&
124                     node->type == HAMMER_BTREE_TYPE_LEAF) {
125                         /*
126                          * Scan elements
127                          */
128                         maxcount = HAMMER_BTREE_LEAF_ELMS;
129                         for (i = 0; i < node->count && i < maxcount; ++i) {
130                                 elm = &node->elms[i];
131                                 if (elm->base.btype != 'R')
132                                         continue;
133                                 recover_elm(&elm->leaf);
134                         }
135                 }
136         }
137 }
138
139 static void
140 recover_elm(hammer_btree_leaf_elm_t leaf)
141 {
142         struct buffer_info *data_buffer = NULL;
143         struct recover_dict *dict;
144         struct recover_dict *dict2;
145         hammer_data_ondisk_t ondisk;
146         hammer_off_t data_offset;
147         struct stat st;
148         int chunk;
149         int len;
150         int zfill;
151         int64_t file_offset;
152         uint16_t llid;
153         size_t nlen;
154         int fd;
155         char *name;
156         char *path1;
157         char *path2;
158
159         /*
160          * Ignore deleted records
161          */
162         if (leaf->delete_ts)
163                 return;
164         if ((data_offset = leaf->data_offset) != 0)
165                 ondisk = get_buffer_data(data_offset, &data_buffer, 0);
166         else
167                 ondisk = NULL;
168         if (ondisk == NULL)
169                 goto done;
170
171         len = leaf->data_len;
172         chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
173         if (chunk > len)
174                 chunk = len;
175
176         if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
177                 goto done;
178
179         llid = leaf->base.localization >> 16;
180
181         dict = get_dict(leaf->base.obj_id, llid);
182
183         switch(leaf->base.rec_type) {
184         case HAMMER_RECTYPE_INODE:
185                 /*
186                  * We found an inode which also tells us where the file
187                  * or directory is in the directory hierarchy.
188                  */
189                 if (VerboseOpt) {
190                         printf("file %016jx:%05d inode found\n",
191                                 (uintmax_t)leaf->base.obj_id, llid);
192                 }
193                 path1 = recover_path(dict);
194
195                 /*
196                  * Attach the inode to its parent.  This isn't strictly
197                  * necessary because the information is also in the
198                  * directory entries, but if we do not find the directory
199                  * entry this ensures that the files will still be
200                  * reasonably well organized in their proper directories.
201                  */
202                 if ((dict->flags & DICTF_PARENT) == 0 &&
203                     dict->obj_id != 1 && ondisk->inode.parent_obj_id != 0) {
204                         dict->flags |= DICTF_PARENT;
205                         dict->parent = get_dict(ondisk->inode.parent_obj_id,
206                                                 llid);
207                         if (dict->parent &&
208                             (dict->parent->flags & DICTF_MADEDIR) == 0) {
209                                 dict->parent->flags |= DICTF_MADEDIR;
210                                 path2 = recover_path(dict->parent);
211                                 printf("mkdir %s\n", path2);
212                                 mkdir(path2, 0777);
213                                 free(path2);
214                                 path2 = NULL;
215                         }
216                 }
217                 if (dict->obj_type == 0)
218                         dict->obj_type = ondisk->inode.obj_type;
219                 dict->size = ondisk->inode.size;
220                 path2 = recover_path(dict);
221
222                 if (lstat(path1, &st) == 0) {
223                         if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
224                                 truncate(path1, dict->size);
225                                 /* chmod(path1, 0666); */
226                         }
227                         if (strcmp(path1, path2)) {
228                                 printf("Rename %s -> %s\n", path1, path2);
229                                 rename(path1, path2);
230                         }
231                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
232                         printf("mkinode (file) %s\n", path2);
233                         fd = open(path2, O_RDWR|O_CREAT, 0666);
234                         if (fd > 0)
235                                 close(fd);
236                 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
237                         printf("mkinode (dir) %s\n", path2);
238                         mkdir(path2, 0777);
239                         dict->flags |= DICTF_MADEDIR;
240                 }
241                 free(path1);
242                 free(path2);
243                 break;
244         case HAMMER_RECTYPE_DATA:
245                 /*
246                  * File record data
247                  */
248                 if (leaf->base.obj_id == 0)
249                         break;
250                 if (VerboseOpt) {
251                         printf("file %016jx:%05d data %016jx,%d\n",
252                                 (uintmax_t)leaf->base.obj_id,
253                                 llid,
254                                 (uintmax_t)leaf->base.key - len,
255                                 len);
256                 }
257
258                 /*
259                  * Update the dictionary entry
260                  */
261                 if (dict->obj_type == 0)
262                         dict->obj_type = HAMMER_OBJTYPE_REGFILE;
263
264                 /*
265                  * If the parent directory has not been created we
266                  * have to create it (typically a PFS%05d)
267                  */
268                 if (dict->parent &&
269                     (dict->parent->flags & DICTF_MADEDIR) == 0) {
270                         dict->parent->flags |= DICTF_MADEDIR;
271                         path2 = recover_path(dict->parent);
272                         printf("mkdir %s\n", path2);
273                         mkdir(path2, 0777);
274                         free(path2);
275                         path2 = NULL;
276                 }
277
278                 /*
279                  * Create the file if necessary, report file creations
280                  */
281                 path1 = recover_path(dict);
282                 if (CachedPath && strcmp(CachedPath, path1) == 0) {
283                         fd = CachedFd;
284                 } else {
285                         fd = open(path1, O_CREAT|O_RDWR, 0666);
286                 }
287                 if (fd < 0) {
288                         printf("Unable to create %s: %s\n",
289                                 path1, strerror(errno));
290                         free(path1);
291                         break;
292                 }
293                 if ((dict->flags & DICTF_MADEFILE) == 0) {
294                         dict->flags |= DICTF_MADEFILE;
295                         printf("mkfile %s\n", path1);
296                 }
297
298                 /*
299                  * And write the record.  A HAMMER data block is aligned
300                  * and may contain trailing zeros after the file EOF.  The
301                  * inode record is required to get the actual file size.
302                  *
303                  * However, when the inode record is not available
304                  * we can do a sparse write and that will get it right
305                  * most of the time even if the inode record is never
306                  * found.
307                  */
308                 file_offset = (int64_t)leaf->base.key - len;
309                 lseek(fd, (off_t)file_offset, SEEK_SET);
310                 while (len) {
311                         if (dict->size == -1) {
312                                 for (zfill = chunk - 1; zfill >= 0; --zfill) {
313                                         if (((char *)ondisk)[zfill])
314                                                 break;
315                                 }
316                                 ++zfill;
317                         } else {
318                                 zfill = chunk;
319                         }
320
321                         if (zfill)
322                                 write(fd, ondisk, zfill);
323                         if (zfill < chunk)
324                                 lseek(fd, chunk - zfill, SEEK_CUR);
325
326                         len -= chunk;
327                         data_offset += chunk;
328                         file_offset += chunk;
329                         ondisk = get_buffer_data(data_offset, &data_buffer, 0);
330                         if (ondisk == NULL)
331                                 break;
332                         chunk = HAMMER_BUFSIZE -
333                                 ((int)data_offset & HAMMER_BUFMASK);
334                         if (chunk > len)
335                                 chunk = len;
336                 }
337                 if (dict->size >= 0 && file_offset > dict->size) {
338                         ftruncate(fd, dict->size);
339                         /* fchmod(fd, 0666); */
340                 }
341
342                 if (fd == CachedFd) {
343                         free(path1);
344                 } else if (CachedPath) {
345                         free(CachedPath);
346                         close(CachedFd);
347                         CachedPath = path1;
348                         CachedFd = fd;
349                 } else {
350                         CachedPath = path1;
351                         CachedFd = fd;
352                 }
353                 break;
354         case HAMMER_RECTYPE_DIRENTRY:
355                 nlen = len - offsetof(struct hammer_entry_data, name[0]);
356                 if ((int)nlen < 0)      /* illegal length */
357                         break;
358                 if (ondisk->entry.obj_id == 0 || ondisk->entry.obj_id == 1)
359                         break;
360                 name = malloc(nlen + 1);
361                 bcopy(ondisk->entry.name, name, nlen);
362                 name[nlen] = 0;
363                 sanitize_string(name);
364
365                 /*
366                  * We can't deal with hardlinks so if the object already
367                  * has a name assigned to it we just keep using that name.
368                  */
369                 dict2 = get_dict(ondisk->entry.obj_id, llid);
370                 path1 = recover_path(dict2);
371
372                 if (dict2->name == NULL)
373                         dict2->name = name;
374                 else
375                         free(name);
376
377                 /*
378                  * Attach dict2 to its directory (dict), create the
379                  * directory (dict) if necessary.  We must ensure
380                  * that the directory entry exists in order to be
381                  * able to properly rename() the file without creating
382                  * a namespace conflict.
383                  */
384                 if ((dict2->flags & DICTF_PARENT) == 0) {
385                         dict2->flags |= DICTF_PARENT;
386                         dict2->parent = dict;
387                         if ((dict->flags & DICTF_MADEDIR) == 0) {
388                                 dict->flags |= DICTF_MADEDIR;
389                                 path2 = recover_path(dict);
390                                 printf("mkdir %s\n", path2);
391                                 mkdir(path2, 0777);
392                                 free(path2);
393                                 path2 = NULL;
394                         }
395                 }
396                 path2 = recover_path(dict2);
397                 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
398                         printf("Rename %s -> %s\n", path1, path2);
399                         rename(path1, path2);
400                 }
401                 free(path1);
402                 free(path2);
403
404                 printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
405                         (uintmax_t)leaf->base.obj_id,
406                         llid,
407                         (uintmax_t)ondisk->entry.obj_id,
408                         name);
409                 break;
410         default:
411                 /*
412                  * Ignore any other record types
413                  */
414                 break;
415         }
416 done:
417         if (data_buffer)
418                 rel_buffer(data_buffer);
419 }
420
421 #define RD_HSIZE        32768
422 #define RD_HMASK        (RD_HSIZE - 1)
423
424 struct recover_dict *RDHash[RD_HSIZE];
425
426 static
427 struct recover_dict *
428 get_dict(int64_t obj_id, uint16_t llid)
429 {
430         struct recover_dict *dict;
431         int i;
432
433         if (obj_id == 0)
434                 return(NULL);
435
436         i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
437         for (dict = RDHash[i]; dict; dict = dict->next) {
438                 if (dict->obj_id == obj_id &&
439                     dict->llid == llid) {
440                         break;
441                 }
442         }
443         if (dict == NULL) {
444                 dict = malloc(sizeof(*dict));
445                 bzero(dict, sizeof(*dict));
446                 dict->obj_id = obj_id;
447                 dict->llid = llid;
448                 dict->next = RDHash[i];
449                 dict->size = -1;
450                 RDHash[i] = dict;
451
452                 /*
453                  * Always connect dangling dictionary entries to object 1
454                  * (the root of the PFS).
455                  *
456                  * DICTF_PARENT will not be set until we know what the
457                  * real parent directory object is.
458                  */
459                 if (dict->obj_id != 1)
460                         dict->parent = get_dict(1, llid);
461         }
462         return(dict);
463 }
464
465 struct path_info {
466         enum { PI_FIGURE, PI_LOAD } state;
467         uint16_t llid;
468         char *base;
469         char *next;
470         int len;
471 };
472
473 static void recover_path_helper(struct recover_dict *, struct path_info *);
474
475 static
476 char *
477 recover_path(struct recover_dict *dict)
478 {
479         struct path_info info;
480
481         bzero(&info, sizeof(info));
482         info.llid = dict->llid;
483         info.state = PI_FIGURE;
484         recover_path_helper(dict, &info);
485         info.base = malloc(info.len);
486         info.next = info.base;
487         info.state = PI_LOAD;
488         recover_path_helper(dict, &info);
489
490         return(info.base);
491 }
492
493 static
494 void
495 recover_path_helper(struct recover_dict *dict, struct path_info *info)
496 {
497         /*
498          * Calculate path element length
499          */
500         dict->flags |= DICTF_TRAVERSED;
501
502         switch(info->state) {
503         case PI_FIGURE:
504                 if (dict->obj_id == 1)
505                         info->len += 8;
506                 else if (dict->name)
507                         info->len += strlen(dict->name);
508                 else
509                         info->len += 6 + 16;
510                 ++info->len;
511
512                 if (dict->parent &&
513                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
514                         recover_path_helper(dict->parent, info);
515                 } else {
516                         info->len += strlen(TargetDir) + 1;
517                 }
518                 break;
519         case PI_LOAD:
520                 if (dict->parent &&
521                     (dict->parent->flags & DICTF_TRAVERSED) == 0) {
522                         recover_path_helper(dict->parent, info);
523                 } else {
524                         strcpy(info->next, TargetDir);
525                         info->next += strlen(info->next);
526                 }
527
528                 *info->next++ = '/';
529                 if (dict->obj_id == 1) {
530                         snprintf(info->next, 8+1, "PFS%05d", info->llid);
531                 } else if (dict->name) {
532                         strcpy(info->next, dict->name);
533                 } else {
534                         snprintf(info->next, 6+16+1, "obj_0x%016jx",
535                                 (uintmax_t)dict->obj_id);
536                 }
537                 info->next += strlen(info->next);
538                 break;
539         }
540         dict->flags &= ~DICTF_TRAVERSED;
541 }
542
543 static
544 void
545 sanitize_string(char *str)
546 {
547         while (*str) {
548                 if (!isprint(*str))
549                         *str = 'x';
550                 ++str;
551         }
552 }