4 * CPDUP <options> source destination
6 * (c) Copyright 1997-1999 by Matthew Dillon and Dima Ruban. Permission to
7 * use and distribute based on the FreeBSD copyright. Supplied as-is,
8 * USE WITH EXTREME CAUTION.
10 * This program attempts to duplicate the source onto the destination as
11 * exactly as possible, retaining modify times, flags, perms, uid, and gid.
12 * It can duplicate devices, files (including hardlinks), softlinks,
13 * directories, and so forth. It is recursive by default! The duplication
14 * is inclusive of removal of files/directories on the destination that do
15 * not exist on the source. This program supports a per-directory exception
16 * file called .cpignore, or a user-specified exception file.
20 * - does not cross partition boundries on source
21 * - asks for confirmation on deletions unless -i0 is specified
22 * - refuses to replace a destination directory with a source file
23 * unless -s0 is specified.
24 * - terminates on error
28 * - does not copy file if mtime, flags, perms, and size match unless
31 * - copies to temporary and renames-over the original, allowing
32 * you to update live systems
34 * - copies uid, gid, mtime, perms, flags, softlinks, devices, hardlinks,
35 * and recurses through directories.
37 * - accesses a per-directory exclusion file, .cpignore, containing
38 * standard wildcarded ( ? / * style, NOT regex) exclusions.
40 * - tries to play permissions and flags smart in regards to overwriting
41 * schg files and doing related stuff.
43 * - Can do MD5 consistancy checks
45 * $DragonFly: src/bin/cpdup/cpdup.c,v 1.10 2006/04/25 21:30:45 dillon Exp $
49 * Example: cc -O cpdup.c -o cpdup -lmd
51 * ".MD5.CHECKSUMS" contains md5 checksumms for the current directory.
52 * This file is stored on the source.
58 #define HMASK (HSIZE-1)
63 struct Node *no_HNext;
82 struct hlink *hltable[HASHF];
84 void RemoveRecur(const char *dpath, dev_t devNo);
85 void InitList(List *list);
86 void ResetList(List *list);
87 int AddList(List *list, const char *name, int n);
88 static struct hlink *hltlookup(struct stat *);
89 static struct hlink *hltadd(struct stat *, const char *);
90 static int shash(const char *s);
91 static void hltdelete(struct hlink *);
92 int YesNo(const char *path);
93 static int xrename(const char *src, const char *dst, u_long flags);
94 static int xlink(const char *src, const char *dst, u_long flags);
95 int WildCmp(const char *s1, const char *s2);
96 int DoCopy(const char *spath, const char *dpath, dev_t sdevNo, dev_t ddevNo);
98 int AskConfirmation = 1;
107 int EnableDirectoryRetries;
108 const char *UseCpFile;
109 const char *MD5CacheFile;
110 const char *FSMIDCacheFile;
112 int64_t CountSourceBytes;
113 int64_t CountSourceItems;
114 int64_t CountCopiedItems;
115 int64_t CountReadBytes;
116 int64_t CountWriteBytes;
117 int64_t CountRemovedItems;
120 main(int ac, char **av)
125 struct timeval start;
127 gettimeofday(&start, NULL);
128 for (i = 1; i < ac; ++i) {
135 } else if (dst == NULL) {
138 fatal("too many arguments");
146 v = strtol(ptr, NULL, 0);
151 while (*ptr == 'v') {
155 if (*ptr >= '0' && *ptr <= '9')
156 VerboseOpt = strtol(ptr, NULL, 0);
165 UseCpFile = ".cpignore";
168 UseCpFile = (*ptr) ? ptr : av[++i];
184 FSMIDCacheFile = ".FSMID.CHECK";
188 FSMIDCacheFile = av[++i];
192 MD5CacheFile = av[++i];
196 MD5CacheFile = ".MD5.CHECKSUMS";
199 setvbuf(stdout, NULL, _IOLBF, 0);
202 fatal("illegal option: %s\n", ptr - 2);
209 * dst may be NULL only if -m option is specified,
210 * which forces an update of the MD5 checksums
213 if (dst == NULL && UseMD5Opt == 0) {
218 i = DoCopy(src, dst, (dev_t)-1, (dev_t)-1);
220 i = DoCopy(src, NULL, (dev_t)-1, (dev_t)-1);
225 if (SummaryOpt && i == 0) {
229 gettimeofday(&end, NULL);
230 CountSourceBytes += sizeof(struct stat) * CountSourceItems;
231 CountReadBytes += sizeof(struct stat) * CountSourceItems;
232 CountWriteBytes += sizeof(struct stat) * CountCopiedItems;
233 CountWriteBytes += sizeof(struct stat) * CountRemovedItems;
235 duration = end.tv_sec - start.tv_sec;
237 duration += end.tv_usec - start.tv_usec;
238 if (duration == 0) duration = 1;
239 logstd("cpdup completed successfully\n");
240 logstd("%lld bytes source %lld bytes read %lld bytes written (%.1fX speedup)\n",
241 (long long)CountSourceBytes,
242 (long long)CountReadBytes,
243 (long long)CountWriteBytes,
244 ((double)CountSourceBytes * 2.0) / ((double)(CountReadBytes + CountWriteBytes)));
245 logstd("%lld source items %lld items copied %lld things deleted\n",
246 (long long)CountSourceItems,
247 (long long)CountCopiedItems,
248 (long long)CountRemovedItems);
249 logstd("%.1f seconds %5d Kbytes/sec synced %5d Kbytes/sec scanned\n",
250 (float)duration / (float)1000000,
251 (long)((long)1000000 * (CountReadBytes + CountWriteBytes) / duration / 1024.0),
252 (long)((long)1000000 * CountSourceBytes / duration / 1024.0));
254 exit((i == 0) ? 0 : 1);
257 static struct hlink *
258 hltlookup(struct stat *stp)
263 n = stp->st_ino % HASHF;
265 for (hl = hltable[n]; hl; hl = hl->next)
266 if (hl->ino == stp->st_ino)
272 static struct hlink *
273 hltadd(struct stat *stp, const char *path)
278 if (!(new = malloc(sizeof (struct hlink)))) {
279 fprintf(stderr, "out of memory\n");
283 /* initialize and link the new element into the table */
284 new->ino = stp->st_ino;
286 strncpy(new->name, path, 2048);
289 n = stp->st_ino % HASHF;
290 new->next = hltable[n];
292 hltable[n]->prev = new;
299 hltdelete(struct hlink *hl)
303 hl->next->prev = hl->prev;
304 hl->prev->next = hl->next;
307 hl->next->prev = NULL;
309 hltable[hl->ino % HASHF] = hl->next;
316 DoCopy(const char *spath, const char *dpath, dev_t sdevNo, dev_t ddevNo)
320 int r, mres, fres, st2Valid;
326 r = mres = fres = st2Valid = 0;
330 if (lstat(spath, &st1) != 0)
332 st2.st_mode = 0; /* in case lstat fails */
333 st2.st_flags = 0; /* in case lstat fails */
334 if (dpath && lstat(dpath, &st2) == 0)
337 if (S_ISREG(st1.st_mode)) {
338 size = st1.st_blocks * 512;
339 if (st1.st_size % 512)
340 size += st1.st_size % 512 - 512;
347 if (S_ISREG(st1.st_mode) && st1.st_nlink > 1 && dpath) {
348 if ((hln = hltlookup(&st1)) != NULL) {
352 if (st2.st_ino == hln->dino) {
354 * hard link is already correct, nothing to do
357 logstd("%-32s nochange\n", (dpath) ? dpath : spath);
358 if (hln->nlinked == st1.st_nlink)
364 * hard link is not correct, attempt to unlink it
366 if (unlink(dpath) < 0) {
367 logerr("%-32s hardlink: unable to unlink: %s\n",
368 ((dpath) ? dpath : spath), strerror(errno));
375 if (xlink(hln->name, dpath, st1.st_flags) < 0) {
376 logerr("%-32s hardlink: unable to link to %s: %s\n",
377 (dpath ? dpath : spath), hln->name, strerror(errno)
383 if (hln->nlinked == st1.st_nlink) {
389 logstd("%-32s hardlink: %s\n",
390 (dpath ? dpath : spath),
391 (st2Valid ? "relinked" : "linked")
401 * first instance of hardlink must be copied normally
403 hln = hltadd(&st1, dpath);
408 * Do we need to copy the file/dir/link/whatever? Early termination
409 * if we do not. Always redo links. Directories are always traversed
410 * except when the FSMID options are used.
412 * NOTE: st2Valid is true only if dpath != NULL *and* dpath stats good.
417 st1.st_mode == st2.st_mode &&
418 st1.st_flags == st2.st_flags
420 if (S_ISLNK(st1.st_mode) || S_ISDIR(st1.st_mode)) {
422 * If FSMID tracking is turned on we can avoid recursing through
423 * an entire directory subtree if the FSMID matches.
425 #ifdef _ST_FSMID_PRESENT_
427 (UseFSMIDOpt && (fres = fsmid_check(st1.st_fsmid, dpath)) == 0)
429 if (VerboseOpt >= 3) {
431 logstd("%-32s fsmid-nochange\n", (dpath ? dpath : spath));
433 logstd("%-32s nochange\n", (dpath ? dpath : spath));
440 st1.st_size == st2.st_size &&
441 st1.st_uid == st2.st_uid &&
442 st1.st_gid == st2.st_gid &&
443 st1.st_mtime == st2.st_mtime
444 && (UseMD5Opt == 0 || (mres = md5_check(spath, dpath)) == 0)
445 #ifdef _ST_FSMID_PRESENT_
446 && (UseFSMIDOpt == 0 || (fres = fsmid_check(st1.st_fsmid, dpath)) == 0)
450 hln->dino = st2.st_ino;
451 if (VerboseOpt >= 3) {
453 logstd("%-32s md5-nochange\n", (dpath ? dpath : spath));
454 else if (UseFSMIDOpt)
455 logstd("%-32s fsmid-nochange\n", (dpath ? dpath : spath));
457 logstd("%-32s nochange\n", (dpath ? dpath : spath));
459 CountSourceBytes += size;
466 if (st2Valid && !S_ISDIR(st1.st_mode) && S_ISDIR(st2.st_mode)) {
468 logerr("%-32s SAFETY - refusing to copy file over directory\n",
469 (dpath ? dpath : spath)
472 return(0); /* continue with the cpdup anyway */
474 if (QuietOpt == 0 || AskConfirmation) {
475 logstd("%-32s WARNING: non-directory source will blow away\n"
476 "%-32s preexisting dest directory, continuing anyway!\n",
477 ((dpath) ? dpath : spath), "");
480 RemoveRecur(dpath, ddevNo);
484 * The various comparisons failed, copy it.
486 if (S_ISDIR(st1.st_mode)) {
490 logerr("%-32s/ fsmid-CHECK-FAILED\n", (dpath) ? dpath : spath);
491 if ((dir = opendir(spath)) != NULL) {
496 if (S_ISDIR(st2.st_mode) == 0) {
498 if (mkdir(dpath, st1.st_mode | 0700) != 0) {
499 logerr("%s: mkdir failed: %s\n",
500 (dpath ? dpath : spath), strerror(errno));
505 * Matt: why don't you check error codes here?
508 chown(dpath, st1.st_uid, st1.st_gid);
512 * Directory must be scanable by root for cpdup to
513 * work. We'll fix it later if the directory isn't
514 * supposed to be readable ( which is why we fixup
515 * st2.st_mode to match what we did ).
517 if ((st2.st_mode & 0700) != 0700) {
518 chmod(dpath, st2.st_mode | 0700);
522 logstd("%s\n", dpath ? dpath : spath);
526 if ((int)sdevNo >= 0 && st1.st_dev != sdevNo) {
532 if ((int)ddevNo >= 0 && st2.st_dev != ddevNo) {
539 * scan .cpignore file for files/directories
548 if (UseCpFile[0] == '/') {
549 fpath = mprintf("%s", UseCpFile);
551 fpath = mprintf("%s/%s", spath, UseCpFile);
553 AddList(&list, strrchr(fpath, '/') + 1, 1);
554 if ((fi = fopen(fpath, "r")) != NULL) {
555 while (fgets(buf, sizeof(buf), fi) != NULL) {
558 if (l && buf[l-1] == '\n')
560 if (buf[0] && buf[0] != '#')
561 AddList(&list, buf, 1);
569 * Automatically exclude MD5CacheFile that we create on the
570 * source from the copy to the destination.
572 * Automatically exclude a FSMIDCacheFile on the source that
573 * would otherwise overwrite the one we maintain on the target.
576 AddList(&list, MD5CacheFile, 1);
578 AddList(&list, FSMIDCacheFile, 1);
580 while (noLoop == 0 && (den = readdir(dir)) != NULL) {
587 if (strcmp(den->d_name, ".") == 0 ||
588 strcmp(den->d_name, "..") == 0
593 * ignore if on .cpignore list
595 if (AddList(&list, den->d_name, 0) == 1) {
598 nspath = mprintf("%s/%s", spath, den->d_name);
600 ndpath = mprintf("%s/%s", dpath, den->d_name);
615 * Remove files/directories from destination that do not appear
618 if (dpath && (dir = opendir(dpath)) != NULL) {
619 while (noLoop == 0 && (den = readdir(dir)) != NULL) {
623 if (strcmp(den->d_name, ".") == 0 ||
624 strcmp(den->d_name, "..") == 0
629 * If object does not exist in source or .cpignore
630 * then recursively remove it.
632 if (AddList(&list, den->d_name, 3) == 3) {
635 ndpath = mprintf("%s/%s", dpath, den->d_name);
636 RemoveRecur(ndpath, ddevNo);
646 st1.st_uid != st2.st_uid ||
647 st1.st_gid != st2.st_gid
649 chown(dpath, st1.st_uid, st1.st_gid);
651 if (st2Valid == 0 || st1.st_mode != st2.st_mode) {
652 chmod(dpath, st1.st_mode);
654 if (st2Valid == 0 || st1.st_flags != st2.st_flags) {
655 chflags(dpath, st1.st_flags);
659 } else if (dpath == NULL) {
661 * If dpath is NULL, we are just updating the MD5
663 if (UseMD5Opt && S_ISREG(st1.st_mode)) {
664 mres = md5_check(spath, NULL);
666 if (VerboseOpt > 1) {
668 logstd("%-32s md5-update\n", (dpath) ? dpath : spath);
670 logstd("%-32s md5-ok\n", (dpath) ? dpath : spath);
671 } else if (!QuietOpt && mres < 0) {
672 logstd("%-32s md5-update\n", (dpath) ? dpath : spath);
675 } else if (S_ISREG(st1.st_mode)) {
680 path = mprintf("%s.tmp", dpath);
683 * Handle check failure message.
686 logerr("%-32s md5-CHECK-FAILED\n", (dpath) ? dpath : spath);
688 logerr("%-32s fsmid-CHECK-FAILED\n", (dpath) ? dpath : spath);
690 if ((fd1 = open(spath, O_RDONLY)) >= 0) {
691 if ((fd2 = open(path, O_WRONLY|O_CREAT|O_EXCL, 0600)) < 0) {
693 * There could be a .tmp file from a previously interrupted
694 * run, delete and retry. Fail if we still can't get at it.
698 fd2 = open(path, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600);
702 * Matt: I think 64k would be faster here
709 * Matt: What about holes?
712 while ((n = read(fd1, buf, sizeof(buf))) > 0) {
714 if (write(fd2, buf, n) != n)
720 struct timeval tv[2];
722 bzero(tv, sizeof(tv));
723 tv[0].tv_sec = st1.st_mtime;
724 tv[1].tv_sec = st1.st_mtime;
727 chown(path, st1.st_uid, st1.st_gid);
728 chmod(path, st1.st_mode);
729 if (xrename(path, dpath, st2.st_flags) != 0) {
730 logerr("%-32s rename-after-copy failed: %s\n",
731 (dpath ? dpath : spath), strerror(errno)
736 logstd("%-32s copy-ok\n", (dpath ? dpath : spath));
738 chflags(dpath, st1.st_flags);
740 CountReadBytes += size;
741 CountWriteBytes += size;
742 CountSourceBytes += size;
746 logerr("%-32s %s failed: %s\n",
747 (dpath ? dpath : spath), op, strerror(errno)
753 logerr("%-32s create (uid %d, euid %d) failed: %s\n",
754 (dpath ? dpath : spath), getuid(), geteuid(),
761 logerr("%-32s copy: open failed: %s\n",
762 (dpath ? dpath : spath),
770 if (!r && stat(dpath, &st2) == 0)
771 hln->dino = st2.st_ino;
775 } else if (S_ISLNK(st1.st_mode)) {
782 snprintf(path, sizeof(path), "%s.tmp", dpath);
783 n1 = readlink(spath, link1, sizeof(link1) - 1);
784 n2 = readlink(dpath, link2, sizeof(link2) - 1);
786 if (ForceOpt || n1 != n2 || bcmp(link1, link2, n1) != 0) {
790 if (symlink(link1, path) < 0) {
791 logerr("%-32s symlink (%s->%s) failed: %s\n",
792 (dpath ? dpath : spath), link1, path,
797 lchown(path, st1.st_uid, st1.st_gid);
799 * there is no lchmod() or lchflags(), we
800 * cannot chmod or chflags a softlink.
802 if (xrename(path, dpath, st2.st_flags) != 0) {
803 logerr("%-32s rename softlink (%s->%s) failed: %s\n",
804 (dpath ? dpath : spath),
805 path, dpath, strerror(errno));
806 } else if (VerboseOpt) {
807 logstd("%-32s softlink-ok\n", (dpath ? dpath : spath));
810 CountWriteBytes += n1;
815 logstd("%-32s nochange\n", (dpath ? dpath : spath));
817 CountSourceBytes += n1;
818 CountReadBytes += n1;
819 if (n2 > 0) CountReadBytes += n2;
823 logerr("%-32s softlink-failed\n", (dpath ? dpath : spath));
825 } else if (S_ISCHR(st1.st_mode) || S_ISBLK(st1.st_mode)) {
830 st1.st_mode != st2.st_mode ||
831 st1.st_rdev != st2.st_rdev ||
832 st1.st_uid != st2.st_uid ||
833 st1.st_gid != st2.st_gid
835 snprintf(path, sizeof(path), "%s.tmp", dpath);
838 if (mknod(path, st1.st_mode, st1.st_rdev) == 0) {
839 chmod(path, st1.st_mode);
840 chown(path, st1.st_uid, st1.st_gid);
842 if (xrename(path, dpath, st2.st_flags) != 0) {
843 logerr("%-32s dev-rename-after-create failed: %s\n",
844 (dpath ? dpath : spath),
847 } else if (VerboseOpt) {
848 logstd("%-32s dev-ok\n", (dpath ? dpath : spath));
853 logerr("%-32s dev failed: %s\n",
854 (dpath ? dpath : spath), strerror(errno)
859 logstd("%-32s nochange\n", (dpath ? dpath : spath));
872 RemoveRecur(const char *dpath, dev_t devNo)
876 if (lstat(dpath, &st) == 0) {
879 if (st.st_dev == devNo) {
880 if (S_ISDIR(st.st_mode)) {
883 if ((dir = opendir(dpath)) != NULL) {
885 while ((den = readdir(dir)) != NULL) {
888 if (strcmp(den->d_name, ".") == 0)
890 if (strcmp(den->d_name, "..") == 0)
892 ndpath = mprintf("%s/%s", dpath, den->d_name);
893 RemoveRecur(ndpath, devNo);
898 if (AskConfirmation && NoRemoveOpt == 0) {
900 if (rmdir(dpath) < 0) {
901 logerr("%-32s rmdir failed: %s\n",
902 dpath, strerror(errno)
910 logstd("%-32s not-removed\n", dpath);
911 } else if (rmdir(dpath) == 0) {
913 logstd("%-32s rmdir-ok\n", dpath);
916 logerr("%-32s rmdir failed: %s\n",
917 dpath, strerror(errno)
922 if (AskConfirmation && NoRemoveOpt == 0) {
924 if (remove(dpath) < 0) {
925 logerr("%-32s remove failed: %s\n",
926 dpath, strerror(errno)
934 logstd("%-32s not-removed\n", dpath);
935 } else if (remove(dpath) == 0) {
937 logstd("%-32s remove-ok\n", dpath);
940 logerr("%-32s remove failed: %s\n",
941 dpath, strerror(errno)
953 bzero(list, sizeof(List));
954 list->li_Node.no_Next = &list->li_Node;
958 ResetList(List *list)
962 while ((node = list->li_Node.no_Next) != &list->li_Node) {
963 list->li_Node.no_Next = node->no_Next;
970 AddList(List *list, const char *name, int n)
978 * Scan against wildcards. Only a node value of 1 can be a wildcard
979 * ( usually scanned from .cpignore )
982 for (node = list->li_Hash[0]; node; node = node->no_HNext) {
983 if (strcmp(name, node->no_Name) == 0 ||
984 (n != 1 && node->no_Value == 1 && WildCmp(node->no_Name, name) == 0)
986 return(node->no_Value);
991 * Look for exact match
994 for (node = list->li_Hash[hv]; node; node = node->no_HNext) {
995 if (strcmp(name, node->no_Name) == 0) {
996 return(node->no_Value);
999 node = malloc(sizeof(Node) + strlen(name) + 1);
1001 fprintf(stderr, "out of memory\n");
1005 node->no_Next = list->li_Node.no_Next;
1006 list->li_Node.no_Next = node;
1008 node->no_HNext = list->li_Hash[hv];
1009 list->li_Hash[hv] = node;
1011 strcpy(node->no_Name, name);
1018 shash(const char *s)
1025 if (*s == '*' || *s == '?' ||
1026 *s == '{' || *s == '}' ||
1027 *s == '[' || *s == ']' ||
1032 hv = (hv << 5) ^ *s ^ (hv >> 23);
1035 return(((hv >> 16) ^ hv) & HMASK);
1039 * WildCmp() - compare wild string to sane string
1041 * Return 0 on success, -1 on failure.
1045 WildCmp(const char *w, const char *s)
1048 * skip fixed portion
1054 if (w[1] == 0) /* optimize wild* case */
1060 for (i = 0; i <= l; ++i) {
1061 if (WildCmp(w + 1, s + i) == 0)
1075 if (*w == 0) /* terminator */
1087 YesNo(const char *path)
1091 fprintf(stderr, "remove %s (Yes/No) [No]? ", path);
1094 first = ch = getchar();
1095 while (ch != '\n' && ch != EOF)
1097 return ((first == 'y' || first == 'Y'));
1101 * xrename() - rename with override
1103 * If the rename fails, attempt to override st_flags on the
1104 * destination and rename again. If that fails too, try to
1105 * set the flags back the way they were and give up.
1109 xrename(const char *src, const char *dst, u_long flags)
1115 if ((r = rename(src, dst)) < 0) {
1117 if ((r = rename(src, dst)) < 0)
1118 chflags(dst, flags);
1124 xlink(const char *src, const char *dst, u_long flags)
1130 if ((r = link(src, dst)) < 0) {
1134 chflags(src, flags);