2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "bsdtar_platform.h"
27 __FBSDID("$FreeBSD: src/usr.bin/tar/util.c,v 1.23 2008/12/15 06:00:25 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
32 #ifdef HAVE_SYS_TYPES_H
33 #include <sys/types.h> /* Linux doesn't define mode_t, etc. in sys/stat.h. */
58 /* If we don't have wctype, we need to hack up some version of iswprint(). */
59 #define iswprint isprint
64 #include "passphrase.h"
66 static size_t bsdtar_expand_char(char *, size_t, char);
67 static const char *strip_components(const char *path, int elements);
69 #if defined(_WIN32) && !defined(__CYGWIN__)
73 /* TODO: Hack up a version of mbtowc for platforms with no wide
74 * character support at all. I think the following might suffice,
75 * but it needs careful testing.
77 * #define mbtowc(wcp, p, n) ((*wcp = *p), 1)
82 * Print a string, taking care with any non-printable characters.
84 * Note that we use a stack-allocated buffer to receive the formatted
85 * string if we can. This is partly performance (avoiding a call to
86 * malloc()), partly out of expedience (we have to call vsnprintf()
87 * before malloc() anyway to find out how big a buffer we need; we may
88 * as well point that first call at a small local buffer in case it
89 * works), but mostly for safety (so we can use this to print messages
90 * about out-of-memory conditions).
94 safe_fprintf(FILE *f, const char *fmt, ...)
96 char fmtbuff_stack[256]; /* Place to format the printf() string. */
97 char outbuff[256]; /* Buffer for outgoing characters. */
98 char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */
99 char *fmtbuff; /* Pointer to fmtbuff_stack or fmtbuff_heap. */
108 /* Use a stack-allocated buffer if we can, for speed and safety. */
110 fmtbuff_length = sizeof(fmtbuff_stack);
111 fmtbuff = fmtbuff_stack;
113 /* Try formatting into the stack buffer. */
115 length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
118 /* If the result was too large, allocate a buffer on the heap. */
119 while (length < 0 || length >= fmtbuff_length) {
120 if (length >= fmtbuff_length)
121 fmtbuff_length = length+1;
122 else if (fmtbuff_length < 8192)
124 else if (fmtbuff_length < 1000000)
125 fmtbuff_length += fmtbuff_length / 4;
127 length = fmtbuff_length;
128 fmtbuff_heap[length-1] = '\0';
132 fmtbuff_heap = malloc(fmtbuff_length);
134 /* Reformat the result into the heap buffer if we can. */
135 if (fmtbuff_heap != NULL) {
136 fmtbuff = fmtbuff_heap;
138 length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
141 /* Leave fmtbuff pointing to the truncated
142 * string in fmtbuff_stack. */
143 length = sizeof(fmtbuff_stack) - 1;
148 /* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
149 * more portable, so we use that here instead. */
150 if (mbtowc(NULL, NULL, 1) == -1) { /* Reset the shift state. */
151 /* mbtowc() should never fail in practice, but
152 * handle the theoretical error anyway. */
157 /* Write data, expanding unprintable characters. */
163 /* Convert to wide char, test if the wide
164 * char is printable in the current locale. */
165 if (try_wc && (n = mbtowc(&wc, p, length)) != -1) {
167 if (iswprint(wc) && wc != L'\\') {
168 /* Printable, copy the bytes through. */
172 /* Not printable, format the bytes. */
174 i += (unsigned)bsdtar_expand_char(
178 /* After any conversion failure, don't bother
179 * trying to convert the rest. */
180 i += (unsigned)bsdtar_expand_char(outbuff, i, *p++);
184 /* If our output buffer is full, dump it and keep going. */
185 if (i > (sizeof(outbuff) - 20)) {
187 fprintf(f, "%s", outbuff);
192 fprintf(f, "%s", outbuff);
194 /* If we allocated a heap-based formatting buffer, free it now. */
199 * Render an arbitrary sequence of bytes into printable ASCII characters.
202 bsdtar_expand_char(char *buff, size_t offset, char c)
206 if (isprint((unsigned char)c) && c != '\\')
211 case '\a': buff[i++] = 'a'; break;
212 case '\b': buff[i++] = 'b'; break;
213 case '\f': buff[i++] = 'f'; break;
214 case '\n': buff[i++] = 'n'; break;
216 /* On some platforms, \n and \r are the same. */
217 case '\r': buff[i++] = 'r'; break;
219 case '\t': buff[i++] = 't'; break;
220 case '\v': buff[i++] = 'v'; break;
221 case '\\': buff[i++] = '\\'; break;
223 sprintf(buff + i, "%03o", 0xFF & (int)c);
232 yes(const char *fmt, ...)
240 vfprintf(stderr, fmt, ap);
242 fprintf(stderr, " (y/N)? ");
245 l = read(2, buff, sizeof(buff) - 1);
247 fprintf(stderr, "Keyboard read failed\n");
254 for (p = buff; *p != '\0'; p++) {
255 if (isspace((unsigned char)*p))
271 * The logic here for -C <dir> attempts to avoid
272 * chdir() as long as possible. For example:
273 * "-C /foo -C /bar file" needs chdir("/bar") but not chdir("/foo")
274 * "-C /foo -C bar file" needs chdir("/foo/bar")
275 * "-C /foo -C bar /file1" does not need chdir()
276 * "-C /foo -C bar /file1 file2" needs chdir("/foo/bar") before file2
278 * The only correct way to handle this is to record a "pending" chdir
279 * request and combine multiple requests intelligently until we
280 * need to process a non-absolute file. set_chdir() adds the new dir
281 * to the pending list; do_chdir() actually executes any pending chdir.
283 * This way, programs that build tar command lines don't have to worry
284 * about -C with non-existent directories; such requests will only
285 * fail if the directory must be accessed.
289 set_chdir(struct bsdtar *bsdtar, const char *newdir)
291 #if defined(_WIN32) && !defined(__CYGWIN__)
292 if (newdir[0] == '/' || newdir[0] == '\\' ||
293 /* Detect this type, for example, "C:\" or "C:/" */
294 (((newdir[0] >= 'a' && newdir[0] <= 'z') ||
295 (newdir[0] >= 'A' && newdir[0] <= 'Z')) &&
296 newdir[1] == ':' && (newdir[2] == '/' || newdir[2] == '\\'))) {
298 if (newdir[0] == '/') {
300 /* The -C /foo -C /bar case; dump first one. */
301 free(bsdtar->pending_chdir);
302 bsdtar->pending_chdir = NULL;
304 if (bsdtar->pending_chdir == NULL)
305 /* Easy case: no previously-saved dir. */
306 bsdtar->pending_chdir = strdup(newdir);
308 /* The -C /foo -C bar case; concatenate */
309 char *old_pending = bsdtar->pending_chdir;
310 size_t old_len = strlen(old_pending);
311 bsdtar->pending_chdir = malloc(old_len + strlen(newdir) + 2);
312 if (old_pending[old_len - 1] == '/')
313 old_pending[old_len - 1] = '\0';
314 if (bsdtar->pending_chdir != NULL)
315 sprintf(bsdtar->pending_chdir, "%s/%s",
316 old_pending, newdir);
319 if (bsdtar->pending_chdir == NULL)
320 lafe_errc(1, errno, "No memory");
324 do_chdir(struct bsdtar *bsdtar)
326 if (bsdtar->pending_chdir == NULL)
329 if (chdir(bsdtar->pending_chdir) != 0) {
330 lafe_errc(1, 0, "could not chdir to '%s'\n",
331 bsdtar->pending_chdir);
333 free(bsdtar->pending_chdir);
334 bsdtar->pending_chdir = NULL;
338 strip_components(const char *p, int elements)
340 /* Skip as many elements as necessary. */
341 while (elements > 0) {
344 #if defined(_WIN32) && !defined(__CYGWIN__)
345 case '\\': /* Support \ path sep on Windows ONLY. */
350 /* Path is too short, skip it. */
355 /* Skip any / characters. This handles short paths that have
356 * additional / termination. This also handles the case where
357 * the logic above stops in the middle of a duplicate //
358 * sequence (which would otherwise get converted to an
363 #if defined(_WIN32) && !defined(__CYGWIN__)
364 case '\\': /* Support \ path sep on Windows ONLY. */
377 warn_strip_leading_char(struct bsdtar *bsdtar, const char *c)
379 if (!bsdtar->warned_lead_slash) {
381 "Removing leading '%c' from member names",
383 bsdtar->warned_lead_slash = 1;
388 warn_strip_drive_letter(struct bsdtar *bsdtar)
390 if (!bsdtar->warned_lead_slash) {
392 "Removing leading drive letter from "
394 bsdtar->warned_lead_slash = 1;
399 * Convert absolute path to non-absolute path by skipping leading
400 * absolute path prefixes.
403 strip_absolute_path(struct bsdtar *bsdtar, const char *p)
407 /* Remove leading "//./" or "//?/" or "//?/UNC/"
408 * (absolute path prefixes used by Windows API) */
409 if ((p[0] == '/' || p[0] == '\\') &&
410 (p[1] == '/' || p[1] == '\\') &&
411 (p[2] == '.' || p[2] == '?') &&
412 (p[3] == '/' || p[3] == '\\'))
415 (p[4] == 'U' || p[4] == 'u') &&
416 (p[5] == 'N' || p[5] == 'n') &&
417 (p[6] == 'C' || p[6] == 'c') &&
418 (p[7] == '/' || p[7] == '\\'))
422 warn_strip_drive_letter(bsdtar);
425 /* Remove multiple leading slashes and Windows drive letters. */
428 if (((p[0] >= 'a' && p[0] <= 'z') ||
429 (p[0] >= 'A' && p[0] <= 'Z')) &&
432 warn_strip_drive_letter(bsdtar);
435 /* Remove leading "/../", "/./", "//", etc. */
436 while (p[0] == '/' || p[0] == '\\') {
439 (p[3] == '/' || p[3] == '\\')) {
440 p += 3; /* Remove "/..", leave "/" for next pass. */
441 } else if (p[1] == '.' &&
442 (p[2] == '/' || p[2] == '\\')) {
443 p += 2; /* Remove "/.", leave "/" for next pass. */
445 p += 1; /* Remove "/". */
446 warn_strip_leading_char(bsdtar, rp);
454 * Handle --strip-components and any future path-rewriting options.
455 * Returns non-zero if the pathname should not be extracted.
457 * Note: The rewrites are applied uniformly to pathnames and hardlink
458 * names but not to symlink bodies. This is deliberate: Symlink
459 * bodies are not necessarily filenames. Even when they are, they
460 * need to be interpreted relative to the directory containing them,
461 * so simple rewrites like this are rarely appropriate.
463 * TODO: Support pax-style regex path rewrites.
466 edit_pathname(struct bsdtar *bsdtar, struct archive_entry *entry)
468 const char *name = archive_entry_pathname(entry);
469 const char *original_name = name;
470 const char *hardlinkname = archive_entry_hardlink(entry);
471 const char *original_hardlinkname = hardlinkname;
472 #if defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H)
476 /* Apply user-specified substitution to pathname. */
477 r = apply_substitution(bsdtar, name, &subst_name, 0, 0);
479 lafe_warnc(0, "Invalid substitution, skipping entry");
483 archive_entry_copy_pathname(entry, subst_name);
484 if (*subst_name == '\0') {
489 name = archive_entry_pathname(entry);
490 original_name = name;
493 /* Apply user-specified substitution to hardlink target. */
494 if (hardlinkname != NULL) {
495 r = apply_substitution(bsdtar, hardlinkname, &subst_name, 0, 1);
497 lafe_warnc(0, "Invalid substitution, skipping entry");
501 archive_entry_copy_hardlink(entry, subst_name);
504 hardlinkname = archive_entry_hardlink(entry);
505 original_hardlinkname = hardlinkname;
508 /* Apply user-specified substitution to symlink body. */
509 if (archive_entry_symlink(entry) != NULL) {
510 r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1, 0);
512 lafe_warnc(0, "Invalid substitution, skipping entry");
516 archive_entry_copy_symlink(entry, subst_name);
522 /* Strip leading dir names as per --strip-components option. */
523 if (bsdtar->strip_components > 0) {
524 name = strip_components(name, bsdtar->strip_components);
528 if (hardlinkname != NULL) {
529 hardlinkname = strip_components(hardlinkname,
530 bsdtar->strip_components);
531 if (hardlinkname == NULL)
536 if (!bsdtar->option_absolute_paths) {
537 /* By default, don't write or restore absolute pathnames. */
538 name = strip_absolute_path(bsdtar, name);
542 if (hardlinkname != NULL) {
543 hardlinkname = strip_absolute_path(bsdtar, hardlinkname);
544 if (*hardlinkname == '\0')
548 /* Strip redundant leading '/' characters. */
549 while (name[0] == '/' && name[1] == '/')
553 /* Replace name in archive_entry. */
554 if (name != original_name) {
555 archive_entry_copy_pathname(entry, name);
557 if (hardlinkname != original_hardlinkname) {
558 archive_entry_copy_hardlink(entry, hardlinkname);
564 * It would be nice to just use printf() for formatting large numbers,
565 * but the compatibility problems are quite a headache. Hence the
566 * following simple utility function.
569 tar_i64toa(int64_t n0)
571 static char buff[24];
572 uint64_t n = n0 < 0 ? -n0 : n0;
573 char *p = buff + sizeof(buff);
577 *--p = '0' + (int)(n % 10);
585 * Like strcmp(), but try to be a little more aware of the fact that
586 * we're comparing two paths. Right now, it just handles leading
587 * "./" and trailing '/' specially, so that "a/b/" == "./a/b"
589 * TODO: Make this better, so that "./a//b/./c/" == "a/b/c"
590 * TODO: After this works, push it down into libarchive.
591 * TODO: Publish the path normalization routines in libarchive so
592 * that bsdtar can normalize paths and use fast strcmp() instead
595 * Note: This is currently only used within write.c, so should
596 * not handle \ path separators.
600 pathcmp(const char *a, const char *b)
602 /* Skip leading './' */
603 if (a[0] == '.' && a[1] == '/' && a[2] != '\0')
605 if (b[0] == '.' && b[1] == '/' && b[2] != '\0')
607 /* Find the first difference, or return (0) if none. */
615 * If one ends in '/' and the other one doesn't,
618 if (a[0] == '/' && a[1] == '\0' && b[0] == '\0')
620 if (a[0] == '\0' && b[0] == '/' && b[1] == '\0')
622 /* They're really different, return the correct sign. */
623 return (*(const unsigned char *)a - *(const unsigned char *)b);
626 #define PPBUFF_SIZE 1024
628 passphrase_callback(struct archive *a, void *_client_data)
630 struct bsdtar *bsdtar = (struct bsdtar *)_client_data;
631 (void)a; /* UNUSED */
633 if (bsdtar->ppbuff == NULL) {
634 bsdtar->ppbuff = malloc(PPBUFF_SIZE);
635 if (bsdtar->ppbuff == NULL)
636 lafe_errc(1, errno, "Out of memory");
638 return lafe_readpassphrase("Enter passphrase:",
639 bsdtar->ppbuff, PPBUFF_SIZE);
643 passphrase_free(char *ppbuff)
645 if (ppbuff != NULL) {
646 memset(ppbuff, 0, PPBUFF_SIZE);
652 * Display information about the current file.
654 * The format here roughly duplicates the output of 'ls -l'.
655 * This is based on SUSv2, where 'tar tv' is documented as
656 * listing additional information in an "unspecified format,"
657 * and 'pax -l' is documented as using the same format as 'ls -l'.
660 list_item_verbose(struct bsdtar *bsdtar, FILE *out, struct archive_entry *entry)
670 * We avoid collecting the entire list in memory at once by
671 * listing things as we see them. However, that also means we can't
672 * just pre-compute the field widths. Instead, we start with guesses
673 * and just widen them as necessary. These numbers are completely
676 if (!bsdtar->u_width) {
678 bsdtar->gs_width = 13;
682 fprintf(out, "%s %d ",
683 archive_entry_strmode(entry),
684 archive_entry_nlink(entry));
686 /* Use uname if it's present, else uid. */
687 p = archive_entry_uname(entry);
688 if ((p == NULL) || (*p == '\0')) {
690 (unsigned long)archive_entry_uid(entry));
694 if (w > bsdtar->u_width)
696 fprintf(out, "%-*s ", (int)bsdtar->u_width, p);
698 /* Use gname if it's present, else gid. */
699 p = archive_entry_gname(entry);
700 if (p != NULL && p[0] != '\0') {
701 fprintf(out, "%s", p);
705 (unsigned long)archive_entry_gid(entry));
707 fprintf(out, "%s", tmp);
711 * Print device number or file size, right-aligned so as to make
712 * total width of group and devnum/filesize fields be gs_width.
713 * If gs_width is too small, grow it.
715 if (archive_entry_filetype(entry) == AE_IFCHR
716 || archive_entry_filetype(entry) == AE_IFBLK) {
717 sprintf(tmp, "%lu,%lu",
718 (unsigned long)archive_entry_rdevmajor(entry),
719 (unsigned long)archive_entry_rdevminor(entry));
721 strcpy(tmp, tar_i64toa(archive_entry_size(entry)));
723 if (w + strlen(tmp) >= bsdtar->gs_width)
724 bsdtar->gs_width = w+strlen(tmp)+1;
725 fprintf(out, "%*s", (int)(bsdtar->gs_width - w), tmp);
727 /* Format the time using 'ls -l' conventions. */
728 tim = archive_entry_mtime(entry);
729 #define HALF_YEAR (time_t)365 * 86400 / 2
730 #if defined(_WIN32) && !defined(__CYGWIN__)
731 #define DAY_FMT "%d" /* Windows' strftime function does not support %e format. */
733 #define DAY_FMT "%e" /* Day number without leading zeros */
735 if (tim < now - HALF_YEAR || tim > now + HALF_YEAR)
736 fmt = bsdtar->day_first ? DAY_FMT " %b %Y" : "%b " DAY_FMT " %Y";
738 fmt = bsdtar->day_first ? DAY_FMT " %b %H:%M" : "%b " DAY_FMT " %H:%M";
739 strftime(tmp, sizeof(tmp), fmt, localtime(&tim));
740 fprintf(out, " %s ", tmp);
741 safe_fprintf(out, "%s", archive_entry_pathname(entry));
743 /* Extra information for links. */
744 if (archive_entry_hardlink(entry)) /* Hard link */
745 safe_fprintf(out, " link to %s",
746 archive_entry_hardlink(entry));
747 else if (archive_entry_symlink(entry)) /* Symbolic link */
748 safe_fprintf(out, " -> %s", archive_entry_symlink(entry));