From d5fdcd004fae4280ec3972beba47f0ed605fd873 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 22 Mar 2008 18:09:16 +0000 Subject: [PATCH] Miscellanious features and adjustments to cpdup. * Add the -V option to cpdup. If specified cpdup will verify the contents of regular files which otherwise appear to be the same. * Only MD5 regular files. * Separate out target bytes read from source bytes read, and do not count the stat structure as bytes read. * Track the number of hard-links created and report in -I output. --- bin/cpdup/cpdup.1 | 22 ++++++-- bin/cpdup/cpdup.c | 126 ++++++++++++++++++++++++++++++---------------- bin/cpdup/cpdup.h | 5 +- bin/cpdup/fsmid.c | 4 +- bin/cpdup/md5.c | 25 ++++----- bin/cpdup/misc.c | 6 ++- 6 files changed, 125 insertions(+), 63 deletions(-) diff --git a/bin/cpdup/cpdup.1 b/bin/cpdup/cpdup.1 index bdf3fbd1f5..e3a81a8bd4 100644 --- a/bin/cpdup/cpdup.1 +++ b/bin/cpdup/cpdup.1 @@ -3,7 +3,7 @@ .\" USE WITH EXTREME CAUTION. .\" .\" -.\" $DragonFly: src/bin/cpdup/cpdup.1,v 1.25 2007/12/23 15:31:28 swildner Exp $ +.\" $DragonFly: src/bin/cpdup/cpdup.1,v 1.26 2008/03/22 18:09:16 dillon Exp $ .Dd October 28, 1999 .Dt CPDUP 1 .Os @@ -30,6 +30,7 @@ .Fl M .Ar file .Oc +.Op Fl V .Op Fl S .Op Fl k .Oo @@ -121,8 +122,8 @@ to the target instead of copying the source to the target if the file found via .Ar path is identical to the source. -Note that a remote host specification should not be used in this option, -but the path will be relative to the target machine. +Note that a remote host specification should not be used for this option's +path, but the path will be relative to the target machine. .Pp This allows one to use .Nm @@ -145,10 +146,25 @@ WARNING: If this option is used .Nm must record the paths for all files it encounters while it operates and it is possible that you may run the process out of memory. +.Pp +The file found vi the hardlink path will be byte-by-byte compared with the +source if the +.Fl V +or +.Fl f +option is also used, otherwise only the stat info is checked to determine +whether it matches the source. .It Fl M Ar file Works the same as .Fl m but allows you to specify the name of the MD5 checkfile. +.It Fl V +This forces the contents of regular files to be verified, even if the +files appear to the be the same. Whereas the +.Fl f +(force) option forces a copy regardless, this option will avoid rewriting +the target if everything matches and the contents are verified to be the +same. .It Fl S This places .Nm diff --git a/bin/cpdup/cpdup.c b/bin/cpdup/cpdup.c index b9f7f53a54..7b60484a4e 100644 --- a/bin/cpdup/cpdup.c +++ b/bin/cpdup/cpdup.c @@ -45,7 +45,7 @@ * - Is able to do incremental mirroring/backups via hardlinks from * the 'previous' version (supplied with -H path). * - * $DragonFly: src/bin/cpdup/cpdup.c,v 1.18 2006/09/21 04:09:28 dillon Exp $ + * $DragonFly: src/bin/cpdup/cpdup.c,v 1.19 2008/03/22 18:09:16 dillon Exp $ */ /*- @@ -98,6 +98,7 @@ int AddList(List *list, const char *name, int n); static struct hlink *hltlookup(struct stat *); static struct hlink *hltadd(struct stat *, const char *); static char *checkHLPath(struct stat *st, const char *spath, const char *dpath); +static int validate_check(const char *spath, const char *dpath); static int shash(const char *s); static void hltdelete(struct hlink *); int YesNo(const char *path); @@ -119,6 +120,7 @@ int SummaryOpt; int SlaveOpt; int EnableDirectoryRetries; int DstBaseLen; +int ValidateOpt; char IOBuf1[65536]; char IOBuf2[65536]; const char *UseCpFile; @@ -129,9 +131,11 @@ const char *FSMIDCacheFile; int64_t CountSourceBytes; int64_t CountSourceItems; int64_t CountCopiedItems; -int64_t CountReadBytes; +int64_t CountSourceReadBytes; +int64_t CountTargetReadBytes; int64_t CountWriteBytes; int64_t CountRemovedItems; +int64_t CountLinkedItems; struct HostConf SrcHost; struct HostConf DstHost; @@ -178,6 +182,9 @@ main(int ac, char **av) if (*ptr >= '0' && *ptr <= '9') VerboseOpt = strtol(ptr, NULL, 0); break; + case 'V': + ValidateOpt = v; + break; case 'I': SummaryOpt = v; break; @@ -298,28 +305,35 @@ main(int ac, char **av) struct timeval end; gettimeofday(&end, NULL); +#if 0 + /* don't count stat's in our byte statistics */ CountSourceBytes += sizeof(struct stat) * CountSourceItems; - CountReadBytes += sizeof(struct stat) * CountSourceItems; + CountSourceReadBytes += sizeof(struct stat) * CountSourceItems; CountWriteBytes += sizeof(struct stat) * CountCopiedItems; CountWriteBytes += sizeof(struct stat) * CountRemovedItems; +#endif duration = end.tv_sec - start.tv_sec; duration *= 1000000; duration += end.tv_usec - start.tv_usec; if (duration == 0) duration = 1; logstd("cpdup completed successfully\n"); - logstd("%lld bytes source %lld bytes read %lld bytes written (%.1fX speedup)\n", + logstd("%lld bytes source, %lld src bytes read, %lld tgt bytes read\n" + "%lld bytes written (%.1fX speedup)\n", (long long)CountSourceBytes, - (long long)CountReadBytes, + (long long)CountSourceReadBytes, + (long long)CountTargetReadBytes, (long long)CountWriteBytes, - ((double)CountSourceBytes * 2.0) / ((double)(CountReadBytes + CountWriteBytes))); - logstd("%lld source items %lld items copied %lld things deleted\n", + ((double)CountSourceBytes * 2.0) / ((double)(CountSourceReadBytes + CountTargetReadBytes + CountWriteBytes))); + logstd("%lld source items, %lld items copied, %lld items linked, " + "%lld things deleted\n", (long long)CountSourceItems, (long long)CountCopiedItems, + (long long)CountLinkedItems, (long long)CountRemovedItems); logstd("%.1f seconds %5d Kbytes/sec synced %5d Kbytes/sec scanned\n", (float)duration / (float)1000000, - (long)((long)1000000 * (CountReadBytes + CountWriteBytes) / duration / 1024.0), + (long)((long)1000000 * (CountSourceReadBytes + CountTargetReadBytes + CountWriteBytes) / duration / 1024.0), (long)((long)1000000 * CountSourceBytes / duration / 1024.0)); } exit((i == 0) ? 0 : 1); @@ -395,9 +409,7 @@ checkHLPath(struct stat *st1, const char *spath, const char *dpath) { struct stat sthl; char *hpath; - int fd1; - int fd2; - int good; + int error; asprintf(&hpath, "%s%s", UseHLPath, dpath + DstBaseLen); @@ -415,30 +427,11 @@ checkHLPath(struct stat *st1, const char *spath, const char *dpath) } /* - * If ForceOpt is set we have to compare the files + * If ForceOpt or ValidateOpt is set we have to compare the files */ - if (ForceOpt) { - fd1 = hc_open(&SrcHost, spath, O_RDONLY, 0); - fd2 = hc_open(&DstHost, hpath, O_RDONLY, 0); - good = 0; - - if (fd1 >= 0 && fd2 >= 0) { - int n; - - while ((n = hc_read(&SrcHost, fd1, IOBuf1, sizeof(IOBuf1))) > 0) { - if (hc_read(&DstHost, fd2, IOBuf2, sizeof(IOBuf2)) != n) - break; - if (bcmp(IOBuf1, IOBuf2, n) != 0) - break; - } - if (n == 0) - good = 1; - } - if (fd1 >= 0) - hc_close(&SrcHost, fd1); - if (fd2 >= 0) - hc_close(&DstHost, fd2); - if (good == 0) { + if (ForceOpt || ValidateOpt) { + error = validate_check(spath, hpath); + if (error) { free(hpath); hpath = NULL; } @@ -446,6 +439,45 @@ checkHLPath(struct stat *st1, const char *spath, const char *dpath) return(hpath); } +/* + * Return 0 if the contents of the file matches the contents of + * the file . + */ +static int +validate_check(const char *spath, const char *dpath) +{ + int error; + int fd1; + int fd2; + + fd1 = hc_open(&SrcHost, spath, O_RDONLY, 0); + fd2 = hc_open(&DstHost, dpath, O_RDONLY, 0); + error = -1; + + if (fd1 >= 0 && fd2 >= 0) { + int n; + int x; + + while ((n = hc_read(&SrcHost, fd1, IOBuf1, sizeof(IOBuf1))) > 0) { + CountSourceReadBytes += n; + x = hc_read(&DstHost, fd2, IOBuf2, sizeof(IOBuf2)); + if (x > 0) + CountTargetReadBytes += x; + if (x != n) + break; + if (bcmp(IOBuf1, IOBuf2, n) != 0) + break; + } + if (n == 0) + error = 0; + } + if (fd1 >= 0) + hc_close(&SrcHost, fd1); + if (fd2 >= 0) + hc_close(&DstHost, fd2); + return (error); +} + int DoCopy(const char *spath, const char *dpath, dev_t sdevNo, dev_t ddevNo) { @@ -469,9 +501,7 @@ DoCopy(const char *spath, const char *dpath, dev_t sdevNo, dev_t ddevNo) st2Valid = 1; if (S_ISREG(st1.st_mode)) { - size = st1.st_blocks * 512; - if (st1.st_size % 512) - size += st1.st_size % 512 - 512; + size = st1.st_size; } /* @@ -584,11 +614,15 @@ relink: st1.st_gid == st2.st_gid && st1.st_mtime == st2.st_mtime #ifndef NOMD5 - && (UseMD5Opt == 0 || (mres = md5_check(spath, dpath)) == 0) + && (UseMD5Opt == 0 || !S_ISREG(st1.st_mode) || + (mres = md5_check(spath, dpath)) == 0) #endif #ifdef _ST_FSMID_PRESENT_ - && (UseFSMIDOpt == 0 || (fres = fsmid_check(st1.st_fsmid, dpath)) == 0) + && (UseFSMIDOpt == 0 || + (fres = fsmid_check(st1.st_fsmid, dpath)) == 0) #endif + && (ValidateOpt == 0 || !S_ISREG(st1.st_mode) || + validate_check(spath, dpath) == 0) ) { if (hln) hln->dino = st2.st_ino; @@ -600,6 +634,8 @@ relink: #endif if (UseFSMIDOpt) logstd("%-32s fsmid-nochange\n", (dpath ? dpath : spath)); + else if (ValidateOpt) + logstd("%-32s nochange (contents validated)\n", (dpath ? dpath : spath)); else logstd("%-32s nochange\n", (dpath ? dpath : spath)); } @@ -701,7 +737,7 @@ relink: if ((fi = fopen(fpath, "r")) != NULL) { while (fgets(buf, sizeof(buf), fi) != NULL) { int l = strlen(buf); - CountReadBytes += l; + CountSourceReadBytes += l; if (l && buf[l-1] == '\n') buf[--l] = 0; if (buf[0] && buf[0] != '#') @@ -866,6 +902,7 @@ relink: if (st2Valid) hc_remove(&DstHost, dpath); if (hc_link(&DstHost, hpath, dpath) == 0) { + ++CountLinkedItems; if (VerboseOpt) { logstd("%-32s hardlinked(-H)\n", (dpath ? dpath : spath)); @@ -930,7 +967,7 @@ relink: hc_chflags(&DstHost, dpath, st1.st_flags); #endif } - CountReadBytes += size; + CountSourceReadBytes += size; CountWriteBytes += size; CountSourceBytes += size; CountSourceItems++; @@ -1009,8 +1046,9 @@ skip_copy: logstd("%-32s nochange\n", (dpath ? dpath : spath)); } CountSourceBytes += n1; - CountReadBytes += n1; - if (n2 > 0) CountReadBytes += n2; + CountSourceReadBytes += n1; + if (n2 > 0) + CountTargetReadBytes += n2; CountSourceItems++; } else { r = 1; @@ -1335,6 +1373,8 @@ xlink(const char *src, const char *dst, u_long flags) errno = e; #endif } + if (r == 0) + ++CountLinkedItems; return(r); } diff --git a/bin/cpdup/cpdup.h b/bin/cpdup/cpdup.h index 2734d8b2dc..cdb4282022 100644 --- a/bin/cpdup/cpdup.h +++ b/bin/cpdup/cpdup.h @@ -1,7 +1,7 @@ /* * CPDUP.H * - * $DragonFly: src/bin/cpdup/cpdup.h,v 1.5 2006/08/18 01:13:51 dillon Exp $ + * $DragonFly: src/bin/cpdup/cpdup.h,v 1.6 2008/03/22 18:09:16 dillon Exp $ */ #include @@ -47,7 +47,8 @@ extern int SummaryOpt; extern int64_t CountSourceBytes; extern int64_t CountSourceItems; extern int64_t CountCopiedItems; -extern int64_t CountReadBytes; +extern int64_t CountSourceReadBytes; +extern int64_t CountTargetReadBytes; extern int64_t CountWriteBytes; extern int64_t CountRemovedItems; diff --git a/bin/cpdup/fsmid.c b/bin/cpdup/fsmid.c index 8493525544..40071cc6ec 100644 --- a/bin/cpdup/fsmid.c +++ b/bin/cpdup/fsmid.c @@ -4,7 +4,7 @@ * (c) Copyright 1997-1999,2006 by Matthew Dillon. Permission to * use and distribute based on the FreeBSD copyright. * - * $DragonFly: src/bin/cpdup/fsmid.c,v 1.1 2006/04/25 21:30:45 dillon Exp $ + * $DragonFly: src/bin/cpdup/fsmid.c,v 1.2 2008/03/22 18:09:16 dillon Exp $ */ #include "cpdup.h" @@ -122,7 +122,7 @@ fsmid_cache(const char *dpath, int ddirlen) * extracting fid_Name - name may contain embedded control * characters. */ - CountReadBytes += nlen+1; + CountSourceReadBytes += nlen+1; node->fid_Name = fextract(fi, nlen, &c, EOF); if (c != '\n') { fprintf(stderr, "Error parsing FSMID Cache: %s (%c)\n", FSMIDDCache, c); diff --git a/bin/cpdup/md5.c b/bin/cpdup/md5.c index 607b7a3306..3dbfc65943 100644 --- a/bin/cpdup/md5.c +++ b/bin/cpdup/md5.c @@ -5,7 +5,7 @@ * use and distribute based on the FreeBSD copyright. Supplied as-is, * USE WITH EXTREME CAUTION. * - * $DragonFly: src/bin/cpdup/md5.c,v 1.1 2006/04/25 21:30:45 dillon Exp $ + * $DragonFly: src/bin/cpdup/md5.c,v 1.2 2008/03/22 18:09:16 dillon Exp $ */ #include "cpdup.h" @@ -19,7 +19,7 @@ typedef struct MD5Node { static MD5Node *md5_lookup(const char *sfile); static void md5_cache(const char *spath, int sdirlen); -static char *doMD5File(const char *filename, char *buf); +static char *doMD5File(const char *filename, char *buf, int is_target); static char *MD5SCache; /* cache source directory name */ static MD5Node *MD5Base; @@ -126,7 +126,7 @@ md5_cache(const char *spath, int sdirlen) * extracting md_Name - name may contain embedded control * characters. */ - CountReadBytes += nlen+1; + CountSourceReadBytes += nlen+1; node->md_Name = fextract(fi, nlen, &c, EOF); if (c != '\n') { fprintf(stderr, "Error parsing MD5 Cache: %s (%c)\n", MD5SCache, c); @@ -205,7 +205,7 @@ md5_check(const char *spath, const char *dpath) */ if (dpath == NULL) { - char *scode = doMD5File(spath, NULL); + char *scode = doMD5File(spath, NULL, 0); r = 0; if (node->md_Code == NULL) { @@ -228,16 +228,16 @@ md5_check(const char *spath, const char *dpath) */ if (node->md_Code == NULL) { - node->md_Code = doMD5File(spath, NULL); + node->md_Code = doMD5File(spath, NULL, 0); MD5SCacheDirty = 1; } - dcode = doMD5File(dpath, NULL); + dcode = doMD5File(dpath, NULL, 1); if (dcode) { if (strcmp(node->md_Code, dcode) == 0) { r = 0; } else { - char *scode = doMD5File(spath, NULL); + char *scode = doMD5File(spath, NULL, 0); if (strcmp(node->md_Code, scode) == 0) { free(scode); @@ -255,15 +255,16 @@ md5_check(const char *spath, const char *dpath) } char * -doMD5File(const char *filename, char *buf) +doMD5File(const char *filename, char *buf, int is_target) { if (SummaryOpt) { struct stat st; if (stat(filename, &st) == 0) { - u_int64_t size = st.st_blocks * 512; - if (st.st_size % 512) - size += st.st_size % 512 - 512; - CountReadBytes += size; + u_int64_t size = st.st_size; + if (is_target) + CountTargetReadBytes += size; + else + CountSourceReadBytes += size; } } return MD5File(filename, buf); diff --git a/bin/cpdup/misc.c b/bin/cpdup/misc.c index 54c50ebc34..bf449d03b3 100644 --- a/bin/cpdup/misc.c +++ b/bin/cpdup/misc.c @@ -1,7 +1,7 @@ /* * MISC.C * - * $DragonFly: src/bin/cpdup/misc.c,v 1.8 2006/09/16 18:18:05 dillon Exp $ + * $DragonFly: src/bin/cpdup/misc.c,v 1.9 2008/03/22 18:09:16 dillon Exp $ */ #include "cpdup.h" @@ -107,6 +107,10 @@ fatal(const char *ctl, ...) " copying if the compare fails\n" " -M file -m+specify MD5 checkfile, else .MD5_CHECKSUMS\n" " copy if md5 check fails\n" + " -H path hardlink from path to target instead of copying\n" + " source to target, if source matches path.\n" + " -V verify file contents even if they appear\n" + " to be the same.\n" #endif " -x use .cpignore as exclusion file\n" " -X file specify exclusion file\n" -- 2.41.0