2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This is a new directory-walking system that addresses a number
29 * of problems I've had with fts(3). In particular, it has no
30 * pathname-length limits (other than the size of 'int'), handles
31 * deep logical traversals, uses considerably less memory, and has
32 * an opaque interface (easier to modify in the future).
34 * Internally, it keeps a single list of "tree_entry" items that
35 * represent filesystem objects that require further attention.
36 * Non-directories are not kept in memory: they are pulled from
37 * readdir(), returned to the client, then freed as soon as possible.
38 * Any directory entry to be traversed gets pushed onto the stack.
40 * There is surprisingly little information that needs to be kept for
41 * each item on the stack. Just the name, depth (represented here as the
42 * string length of the parent directory's pathname), and some markers
43 * indicating how to get back to the parent (via chdir("..") for a
44 * regular dir or via fchdir(2) for a symlink).
46 #include "bsdtar_platform.h"
47 __FBSDID("$FreeBSD: src/usr.bin/tar/tree.c,v 1.9 2008/11/27 05:49:52 kientzle Exp $");
49 #ifdef HAVE_SYS_STAT_H
73 #if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
82 * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
87 struct tree_entry *next;
88 struct tree_entry *parent;
90 size_t dirname_length;
94 /* How to return back to the parent of a symlink. */
96 int symlink_parent_fd;
97 #elif defined(_WIN32) && !defined(__CYGWIN__)
98 char *symlink_parent_path;
100 #error fchdir function required.
104 /* Definitions for tree_entry.flags bitmap. */
105 #define isDir 1 /* This entry is a regular directory. */
106 #define isDirLink 2 /* This entry is a symbolic link to a directory. */
107 #define needsFirstVisit 4 /* This is an initial entry. */
108 #define needsDescent 8 /* This entry needs to be previsited. */
109 #define needsOpen 16 /* This is a directory that needs to be opened. */
110 #define needsAscent 32 /* This entry needs to be postvisited. */
113 * On Windows, "first visit" is handled as a pattern to be handed to
114 * _findfirst(). This is consistent with Windows conventions that
115 * file patterns are handled within the application. On Posix,
116 * "first visit" is just returned to the client.
120 * Local data for this package.
123 struct tree_entry *stack;
124 struct tree_entry *current;
125 #if defined(_WIN32) && !defined(__CYGWIN__)
127 BY_HANDLE_FILE_INFORMATION fileInfo;
128 #define INVALID_DIR_HANDLE INVALID_HANDLE_VALUE
129 WIN32_FIND_DATA _findData;
130 WIN32_FIND_DATA *findData;
133 #define INVALID_DIR_HANDLE NULL
138 int tree_errno; /* Error code from last failed operation. */
140 /* Dynamically-sized buffer for holding path */
144 const char *basename; /* Last path element */
145 size_t dirname_length; /* Leading dir length */
146 size_t path_length; /* Total path length */
156 /* Definitions for tree.flags bitmap. */
157 #define hasStat 16 /* The st entry is valid. */
158 #define hasLstat 32 /* The lst entry is valid. */
159 #define hasFileInfo 64 /* The Windows fileInfo entry is valid. */
161 #if defined(_WIN32) && !defined(__CYGWIN__)
163 tree_dir_next_windows(struct tree *t, const char *pattern);
166 tree_dir_next_posix(struct tree *t);
169 #ifdef HAVE_DIRENT_D_NAMLEN
170 /* BSD extension; avoids need for a strlen() call. */
171 #define D_NAMELEN(dp) (dp)->d_namlen
173 #define D_NAMELEN(dp) (strlen((dp)->d_name))
178 tree_dump(struct tree *t, FILE *out)
181 struct tree_entry *te;
183 fprintf(out, "\tdepth: %d\n", t->depth);
184 fprintf(out, "\tbuff: %s\n", t->buff);
185 fprintf(out, "\tpwd: %s\n", getcwd(buff, sizeof(buff)));
186 fprintf(out, "\tbasename: %s\n", t->basename);
187 fprintf(out, "\tstack:\n");
188 for (te = t->stack; te != NULL; te = te->next) {
189 fprintf(out, "\t\t%s%d:\"%s\" %s%s%s%s%s%s\n",
190 t->current == te ? "*" : " ",
193 te->flags & needsFirstVisit ? "V" : "",
194 te->flags & needsDescent ? "D" : "",
195 te->flags & needsOpen ? "O" : "",
196 te->flags & needsAscent ? "A" : "",
197 te->flags & isDirLink ? "L" : "",
198 (t->current == te && t->d) ? "+" : ""
204 * Add a directory path to the current stack.
207 tree_push(struct tree *t, const char *path)
209 struct tree_entry *te;
211 te = malloc(sizeof(*te));
212 memset(te, 0, sizeof(*te));
214 te->parent = t->current;
216 te->depth = te->parent->depth + 1;
219 te->symlink_parent_fd = -1;
220 te->name = strdup(path);
221 #elif defined(_WIN32) && !defined(__CYGWIN__)
222 te->symlink_parent_path = NULL;
223 te->name = strdup(path);
225 te->flags = needsDescent | needsOpen | needsAscent;
226 te->dirname_length = t->dirname_length;
230 * Append a name to the current dir path.
233 tree_append(struct tree *t, const char *name, size_t name_length)
239 t->buff[t->dirname_length] = '\0';
240 /* Strip trailing '/' from name, unless entire name is "/". */
241 while (name_length > 1 && name[name_length - 1] == '/')
244 /* Resize pathname buffer as needed. */
245 size_needed = name_length + 1 + t->dirname_length;
246 if (t->buff_length < size_needed) {
247 if (t->buff_length < 1024)
248 t->buff_length = 1024;
249 while (t->buff_length < size_needed)
251 t->buff = realloc(t->buff, t->buff_length);
255 p = t->buff + t->dirname_length;
256 t->path_length = t->dirname_length + name_length;
257 /* Add a separating '/' if it's needed. */
258 if (t->dirname_length > 0 && p[-1] != '/') {
263 strncpy_s(p, t->buff_length - (p - t->buff), name, name_length);
265 strncpy(p, name, name_length);
267 p[name_length] = '\0';
272 * Open a directory tree for traversal.
275 tree_open(const char *path)
280 t = malloc(sizeof(*t));
281 memset(t, 0, sizeof(*t));
282 /* First item is set up a lot like a symlink traversal. */
284 t->stack->flags = needsFirstVisit | isDirLink | needsAscent;
285 t->stack->symlink_parent_fd = open(".", O_RDONLY);
287 t->d = INVALID_DIR_HANDLE;
289 #elif defined(_WIN32) && !defined(__CYGWIN__)
291 char *cwd = _getcwd(NULL, 0);
292 char *pathname, *p, *base;
296 /* Take care of '\' character in multi-byte character-set.
297 * Some multi-byte character-set have been using '\' character
298 * for a part of its character code. */
299 l = MultiByteToWideChar(CP_OEMCP, 0, path, strlen(path), NULL, 0);
302 wcs = malloc(sizeof(*wcs) * (l+1));
305 l = MultiByteToWideChar(CP_OEMCP, 0, path, strlen(path), wcs, l);
308 for (wp = wcs; *wp != L'\0'; ++wp) {
312 l = WideCharToMultiByte(CP_OEMCP, 0, wcs, wlen, NULL, 0, NULL, NULL);
315 pathname = malloc(l+1);
316 if (pathname == NULL)
318 l = WideCharToMultiByte(CP_OEMCP, 0, wcs, wlen, pathname, l, NULL, NULL);
322 #if defined(_WIN32) && !defined(__CYGWIN__)
323 /* ASCII version APIs do not accept the path which begin with
325 if (strncmp(base, "//?/", 4) == 0)
329 t = malloc(sizeof(*t));
330 memset(t, 0, sizeof(*t));
331 /* First item is set up a lot like a symlink traversal. */
332 /* printf("Looking for wildcard in %s\n", path); */
333 /* TODO: wildcard detection here screws up on \\?\c:\ UNC names */
334 if (strchr(base, '*') || strchr(base, '?')) {
335 /* It has a wildcard in it... */
336 /* Separate the last element. */
337 p = strrchr(base, '/');
341 tree_append(t, base, p - base);
342 t->dirname_length = t->path_length;
348 t->stack->flags = needsFirstVisit | isDirLink | needsAscent;
349 t->stack->symlink_parent_path = cwd;
350 t->d = INVALID_DIR_HANDLE;
356 * We've finished a directory; ascend back to the parent.
359 tree_ascend(struct tree *t)
361 struct tree_entry *te;
366 if (te->flags & isDirLink) {
368 if (fchdir(te->symlink_parent_fd) != 0) {
369 t->tree_errno = errno;
370 r = TREE_ERROR_FATAL;
372 close(te->symlink_parent_fd);
373 #elif defined(_WIN32) && !defined(__CYGWIN__)
374 if (SetCurrentDirectory(te->symlink_parent_path) == 0) {
375 t->tree_errno = errno;
376 r = TREE_ERROR_FATAL;
378 free(te->symlink_parent_path);
379 te->symlink_parent_path = NULL;
383 #if defined(_WIN32) && !defined(__CYGWIN__)
384 if (SetCurrentDirectory("..") == 0) {
386 if (chdir("..") != 0) {
388 t->tree_errno = errno;
389 r = TREE_ERROR_FATAL;
396 * Pop the working stack.
399 tree_pop(struct tree *t)
401 struct tree_entry *te;
404 t->buff[t->dirname_length] = '\0';
405 if (t->stack == t->current && t->current != NULL)
406 t->current = t->current->parent;
409 t->dirname_length = te->dirname_length;
411 t->basename = t->buff + t->dirname_length;
412 while (t->basename[0] == '/')
420 * Get the next item in the tree traversal.
423 tree_next(struct tree *t)
427 /* If we're called again after a fatal error, that's an API
428 * violation. Just crash now. */
429 if (t->visit_type == TREE_ERROR_FATAL) {
430 fprintf(stderr, "Unable to continue traversing"
431 " directory hierarchy after a fatal error.");
435 while (t->stack != NULL) {
436 /* If there's an open dir, get the next entry from there. */
437 if (t->d != INVALID_DIR_HANDLE) {
438 #if defined(_WIN32) && !defined(__CYGWIN__)
439 r = tree_dir_next_windows(t, NULL);
441 r = tree_dir_next_posix(t);
448 if (t->stack->flags & needsFirstVisit) {
449 #if defined(_WIN32) && !defined(__CYGWIN__)
450 char *d = t->stack->name;
451 t->stack->flags &= ~needsFirstVisit;
452 if (strchr(d, '*') || strchr(d, '?')) {
453 r = tree_dir_next_windows(t, d);
458 /* Not a pattern, handle it as-is... */
460 /* Top stack item needs a regular visit. */
461 t->current = t->stack;
462 tree_append(t, t->stack->name, strlen(t->stack->name));
463 /* t->dirname_length = t->path_length; */
465 t->stack->flags &= ~needsFirstVisit;
466 return (t->visit_type = TREE_REGULAR);
467 } else if (t->stack->flags & needsDescent) {
468 /* Top stack item is dir to descend into. */
469 t->current = t->stack;
470 tree_append(t, t->stack->name, strlen(t->stack->name));
471 t->stack->flags &= ~needsDescent;
472 /* If it is a link, set up fd for the ascent. */
473 if (t->stack->flags & isDirLink) {
475 t->stack->symlink_parent_fd = open(".", O_RDONLY);
477 if (t->openCount > t->maxOpenCount)
478 t->maxOpenCount = t->openCount;
479 #elif defined(_WIN32) && !defined(__CYGWIN__)
480 t->stack->symlink_parent_path = _getcwd(NULL, 0);
483 t->dirname_length = t->path_length;
484 #if defined(_WIN32) && !defined(__CYGWIN__)
485 if (t->path_length == 259 || !SetCurrentDirectory(t->stack->name) != 0)
487 if (chdir(t->stack->name) != 0)
490 /* chdir() failed; return error */
492 t->tree_errno = errno;
493 return (t->visit_type = TREE_ERROR_DIR);
496 return (t->visit_type = TREE_POSTDESCENT);
497 } else if (t->stack->flags & needsOpen) {
498 t->stack->flags &= ~needsOpen;
499 #if defined(_WIN32) && !defined(__CYGWIN__)
500 r = tree_dir_next_windows(t, "*");
502 r = tree_dir_next_posix(t);
507 } else if (t->stack->flags & needsAscent) {
508 /* Top stack item is dir and we're done with it. */
511 t->visit_type = r != 0 ? r : TREE_POSTASCENT;
512 return (t->visit_type);
514 /* Top item on stack is dead. */
516 t->flags &= ~hasLstat;
517 t->flags &= ~hasStat;
520 return (t->visit_type = 0);
523 #if defined(_WIN32) && !defined(__CYGWIN__)
525 tree_dir_next_windows(struct tree *t, const char *pattern)
532 if (pattern != NULL) {
533 t->d = FindFirstFile(pattern, &t->_findData);
534 if (t->d == INVALID_DIR_HANDLE) {
535 r = tree_ascend(t); /* Undo "chdir" */
537 t->tree_errno = errno;
538 t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
539 return (t->visit_type);
541 t->findData = &t->_findData;
543 } else if (!FindNextFile(t->d, &t->_findData)) {
545 t->d = INVALID_DIR_HANDLE;
549 name = t->findData->cFileName;
550 namelen = strlen(name);
551 t->flags &= ~hasLstat;
552 t->flags &= ~hasStat;
553 if (name[0] == '.' && name[1] == '\0')
555 if (name[0] == '.' && name[1] == '.' && name[2] == '\0')
557 tree_append(t, name, namelen);
558 return (t->visit_type = TREE_REGULAR);
563 tree_dir_next_posix(struct tree *t)
570 if ((t->d = opendir(".")) == NULL) {
571 r = tree_ascend(t); /* Undo "chdir" */
573 t->tree_errno = errno;
574 t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
575 return (t->visit_type);
579 t->de = readdir(t->d);
582 t->d = INVALID_DIR_HANDLE;
585 name = t->de->d_name;
586 namelen = D_NAMELEN(t->de);
587 t->flags &= ~hasLstat;
588 t->flags &= ~hasStat;
589 if (name[0] == '.' && name[1] == '\0')
591 if (name[0] == '.' && name[1] == '.' && name[2] == '\0')
593 tree_append(t, name, namelen);
594 return (t->visit_type = TREE_REGULAR);
603 tree_errno(struct tree *t)
605 return (t->tree_errno);
609 * Called by the client to mark the directory just returned from
610 * tree_next() as needing to be visited.
613 tree_descend(struct tree *t)
615 if (t->visit_type != TREE_REGULAR)
618 if (tree_current_is_physical_dir(t)) {
619 tree_push(t, t->basename);
620 t->stack->flags |= isDir;
621 } else if (tree_current_is_dir(t)) {
622 tree_push(t, t->basename);
623 t->stack->flags |= isDirLink;
628 * Get the stat() data for the entry just returned from tree_next().
631 tree_current_stat(struct tree *t)
633 if (!(t->flags & hasStat)) {
634 if (stat(tree_current_access_path(t), &t->st) != 0)
641 #if defined(_WIN32) && !defined(__CYGWIN__)
642 const BY_HANDLE_FILE_INFORMATION *
643 tree_current_file_information(struct tree *t)
645 if (!(t->flags & hasFileInfo)) {
646 HANDLE h = CreateFile(tree_current_access_path(t),
649 FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT,
651 if (h == INVALID_HANDLE_VALUE)
653 if (!GetFileInformationByHandle(h, &t->fileInfo)) {
658 t->flags |= hasFileInfo;
660 return (&t->fileInfo);
664 * Get the lstat() data for the entry just returned from tree_next().
667 tree_current_lstat(struct tree *t)
669 #if defined(_WIN32) && !defined(__CYGWIN__)
670 return (tree_current_stat(t));
672 if (!(t->flags & hasLstat)) {
673 if (lstat(tree_current_access_path(t), &t->lst) != 0)
675 t->flags |= hasLstat;
682 * Test whether current entry is a dir or link to a dir.
685 tree_current_is_dir(struct tree *t)
687 #if defined(_WIN32) && !defined(__CYGWIN__)
689 return (t->findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
690 if (tree_current_file_information(t))
691 return (t->fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
694 const struct stat *st;
696 * If we already have lstat() info, then try some
697 * cheap tests to determine if this is a dir.
699 if (t->flags & hasLstat) {
700 /* If lstat() says it's a dir, it must be a dir. */
701 if (S_ISDIR(tree_current_lstat(t)->st_mode))
703 /* Not a dir; might be a link to a dir. */
704 /* If it's not a link, then it's not a link to a dir. */
705 if (!S_ISLNK(tree_current_lstat(t)->st_mode))
708 * It's a link, but we don't know what it's a link to,
709 * so we'll have to use stat().
713 st = tree_current_stat(t);
714 /* If we can't stat it, it's not a dir. */
717 /* Use the definitive test. Hopefully this is cached. */
718 return (S_ISDIR(st->st_mode));
723 * Test whether current entry is a physical directory. Usually, we
724 * already have at least one of stat() or lstat() in memory, so we
725 * use tricks to try to avoid an extra trip to the disk.
728 tree_current_is_physical_dir(struct tree *t)
730 #if defined(_WIN32) && !defined(__CYGWIN__)
731 if (tree_current_is_physical_link(t))
733 return (tree_current_is_dir(t));
735 const struct stat *st;
738 * If stat() says it isn't a dir, then it's not a dir.
739 * If stat() data is cached, this check is free, so do it first.
741 if ((t->flags & hasStat)
742 && (!S_ISDIR(tree_current_stat(t)->st_mode)))
746 * Either stat() said it was a dir (in which case, we have
747 * to determine whether it's really a link to a dir) or
748 * stat() info wasn't available. So we use lstat(), which
749 * hopefully is already cached.
752 st = tree_current_lstat(t);
753 /* If we can't stat it, it's not a dir. */
756 /* Use the definitive test. Hopefully this is cached. */
757 return (S_ISDIR(st->st_mode));
762 * Test whether current entry is a symbolic link.
765 tree_current_is_physical_link(struct tree *t)
767 #if defined(_WIN32) && !defined(__CYGWIN__)
768 #ifndef IO_REPARSE_TAG_SYMLINK
769 /* Old SDKs do not provide IO_REPARSE_TAG_SYMLINK */
770 #define IO_REPARSE_TAG_SYMLINK 0xA000000CL
773 return ((t->findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
774 && (t->findData->dwReserved0 == IO_REPARSE_TAG_SYMLINK));
777 const struct stat *st = tree_current_lstat(t);
780 return (S_ISLNK(st->st_mode));
785 * Return the access path for the entry just returned from tree_next().
788 tree_current_access_path(struct tree *t)
790 return (t->basename);
794 * Return the full path for the entry just returned from tree_next().
797 tree_current_path(struct tree *t)
803 * Return the length of the path for the entry just returned from tree_next().
806 tree_current_pathlen(struct tree *t)
808 return (t->path_length);
812 * Return the nesting depth of the entry just returned from tree_next().
815 tree_current_depth(struct tree *t)
821 * Terminate the traversal and release any resources.
824 tree_close(struct tree *t)
826 /* Release anything remaining in the stack. */
827 while (t->stack != NULL)
830 /* TODO: Ensure that premature close() resets cwd */
833 if (t->initialDirFd >= 0) {
834 int s = fchdir(t->initialDirFd);
835 (void)s; /* UNUSED */
836 close(t->initialDirFd);
837 t->initialDirFd = -1;
839 #elif defined(_WIN32) && !defined(__CYGWIN__)
840 if (t->initialDir != NULL) {
841 SetCurrentDir(t->initialDir);
843 t->initialDir = NULL;