1 /* cmp - compare two files byte by byte
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4 2002 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; see the file COPYING.
18 If not, write to the Free Software Foundation,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30 #include <hard-locale.h>
36 #if defined LC_MESSAGES && ENABLE_NLS
37 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
39 # define hard_locale_LC_MESSAGES 0
42 static char const authorship_msgid[] =
43 N_("Written by Torbjorn Granlund and David MacKenzie.");
45 static char const copyright_string[] =
46 "Copyright (C) 2002 Free Software Foundation, Inc.";
48 extern char const version_string[];
50 static int cmp (void);
51 static off_t file_position (int);
52 static size_t block_compare (word const *, word const *);
53 static size_t block_compare_and_count (word const *, word const *, off_t *);
54 static void sprintc (char *, unsigned char);
56 /* Name under which this program was invoked. */
59 /* Filenames of the compared files. */
60 static char const *file[2];
62 /* File descriptors of the files. */
63 static int file_desc[2];
65 /* Status of the files. */
66 static struct stat stat_buf[2];
68 /* Read buffers for the files. */
69 static word *buffer[2];
71 /* Optimal block size for the files. */
72 static size_t buf_size;
74 /* Initial prefix to ignore for each file. */
75 static off_t ignore_initial[2];
77 /* Number of bytes to compare. */
78 static uintmax_t bytes = UINTMAX_MAX;
81 static enum comparison_type
83 type_first_diff, /* Print the first difference. */
84 type_all_diffs, /* Print all differences. */
85 type_status /* Exit status only. */
88 /* If nonzero, print values of bytes quoted like cat -t does. */
89 static bool opt_print_bytes;
91 /* Values for long options that do not have single-letter equivalents. */
94 HELP_OPTION = CHAR_MAX + 1
97 static struct option const long_options[] =
99 {"print-bytes", 0, 0, 'b'},
100 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
101 {"ignore-initial", 1, 0, 'i'},
102 {"verbose", 0, 0, 'l'},
103 {"bytes", 1, 0, 'n'},
104 {"silent", 0, 0, 's'},
105 {"quiet", 0, 0, 's'},
106 {"version", 0, 0, 'v'},
107 {"help", 0, 0, HELP_OPTION},
111 static void try_help (char const *, char const *) __attribute__((noreturn));
113 try_help (char const *reason_msgid, char const *operand)
116 error (0, 0, _(reason_msgid), operand);
117 error (EXIT_TROUBLE, 0,
118 _("Try `%s --help' for more information."), program_name);
122 static char const valid_suffixes[] = "kKMGTPEZY0";
124 /* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
125 point after the operand. If DELIMITER is nonzero, the operand may
126 be followed by DELIMITER; otherwise it must be null-terminated. */
128 parse_ignore_initial (char **argptr, char delimiter)
132 char const *arg = *argptr;
133 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
134 if (! (e == LONGINT_OK
135 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
136 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
137 try_help ("invalid --ignore-initial value `%s'", arg);
141 /* Specify the output format. */
143 specify_comparison_type (enum comparison_type t)
146 try_help ("options -l and -s are incompatible", 0);
154 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
155 else if (fclose (stdout) != 0)
156 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159 static char const * const option_help_msgid[] = {
160 N_("-b --print-bytes Print differing bytes."),
161 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
162 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
163 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
164 N_("-l --verbose Output byte numbers and values of all differing bytes."),
165 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
166 N_("-s --quiet --silent Output nothing; yield exit status only."),
167 N_("-v --version Output version info."),
168 N_("--help Output this help."),
175 char const * const *p;
177 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
179 printf ("%s\n\n", _("Compare two files byte by byte."));
180 for (p = option_help_msgid; *p; p++)
181 printf (" %s\n", _(*p));
182 printf ("\n%s\n%s\n\n%s\n\n%s\n",
183 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
184 _("SKIP values may be followed by the following multiplicative suffixes:\n\
185 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
186 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
187 _("If a FILE is `-' or missing, read standard input."),
188 _("Report bugs to <bug-gnu-utils@gnu.org>."));
192 main (int argc, char **argv)
194 int c, f, exit_status;
195 size_t words_per_buffer;
197 exit_failure = EXIT_TROUBLE;
198 initialize_main (&argc, &argv);
199 program_name = argv[0];
200 setlocale (LC_ALL, "");
201 bindtextdomain (PACKAGE, LOCALEDIR);
202 textdomain (PACKAGE);
203 c_stack_action (c_stack_die);
205 /* Parse command line options. */
207 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
212 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
217 ignore_initial[0] = parse_ignore_initial (&optarg, ':');
218 ignore_initial[1] = (*optarg++ == ':'
219 ? parse_ignore_initial (&optarg, 0)
220 : ignore_initial[0]);
224 specify_comparison_type (type_all_diffs);
230 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
231 try_help ("invalid --bytes value `%s'", optarg);
238 specify_comparison_type (type_status);
242 printf ("cmp %s\n%s\n\n%s\n\n%s\n",
243 version_string, copyright_string,
244 _(free_software_msgid), _(authorship_msgid));
258 try_help ("missing operand after `%s'", argv[argc - 1]);
260 file[0] = argv[optind++];
261 file[1] = optind < argc ? argv[optind++] : "-";
263 for (f = 0; f < 2 && optind < argc; f++)
265 char *arg = argv[optind++];
266 ignore_initial[f] = parse_ignore_initial (&arg, 0);
270 try_help ("extra operand `%s'", argv[optind]);
272 for (f = 0; f < 2; f++)
274 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
275 stdin is closed and opening file[0] yields file descriptor 0. */
276 int f1 = f ^ (strcmp (file[1], "-") == 0);
278 /* Two files with the same name are identical.
279 But wait until we open the file once, for proper diagnostics. */
280 if (f && file_name_cmp (file[0], file[1]) == 0)
283 file_desc[f1] = (strcmp (file[f1], "-") == 0
285 : open (file[f1], O_RDONLY, 0));
286 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
288 if (file_desc[f1] < 0 && comparison_type == type_status)
291 error (EXIT_TROUBLE, errno, "%s", file[f1]);
294 set_binary_mode (file_desc[f1], 1);
297 /* If the files are links to the same inode and have the same file position,
298 they are identical. */
300 if (0 < same_file (&stat_buf[0], &stat_buf[1])
301 && same_file_attributes (&stat_buf[0], &stat_buf[1])
302 && file_position (0) == file_position (1))
305 /* If output is redirected to the null device, we may assume `-s'. */
307 if (comparison_type != type_status)
309 struct stat outstat, nullstat;
311 if (fstat (STDOUT_FILENO, &outstat) == 0
312 && stat (NULL_DEVICE, &nullstat) == 0
313 && 0 < same_file (&outstat, &nullstat))
314 comparison_type = type_status;
317 /* If only a return code is needed,
318 and if both input descriptors are associated with plain files,
319 conclude that the files differ if they have different sizes
320 and if more bytes will be compared than are in the smaller file. */
322 if (comparison_type == type_status
323 && S_ISREG (stat_buf[0].st_mode)
324 && S_ISREG (stat_buf[1].st_mode))
326 off_t s0 = stat_buf[0].st_size - file_position (0);
327 off_t s1 = stat_buf[1].st_size - file_position (1);
332 if (s0 != s1 && MIN (s0, s1) < bytes)
336 /* Get the optimal block size of the files. */
338 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
339 STAT_BLOCKSIZE (stat_buf[1]),
340 PTRDIFF_MAX - sizeof (word));
342 /* Allocate word-aligned buffers, with space for sentinels at the end. */
344 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
345 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
346 buffer[1] = buffer[0] + words_per_buffer;
348 exit_status = cmp ();
350 for (f = 0; f < 2; f++)
351 if (close (file_desc[f]) != 0)
352 error (EXIT_TROUBLE, errno, "%s", file[f]);
353 if (exit_status != 0 && comparison_type != type_status)
359 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
360 using `buffer[0]' and `buffer[1]'.
361 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
367 off_t line_number = 1; /* Line number (1...) of difference. */
368 off_t byte_number = 1; /* Byte number (1...) of difference. */
369 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
370 size_t read0, read1; /* Number of bytes read from each file. */
371 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
372 size_t smaller; /* The lesser of `read0' and `read1'. */
373 word *buffer0 = buffer[0];
374 word *buffer1 = buffer[1];
375 char *buf0 = (char *) buffer0;
376 char *buf1 = (char *) buffer1;
377 int ret = EXIT_SUCCESS;
381 if (comparison_type == type_all_diffs)
383 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
385 for (f = 0; f < 2; f++)
386 if (S_ISREG (stat_buf[f].st_mode))
388 off_t file_bytes = stat_buf[f].st_size - file_position (f);
389 if (file_bytes < byte_number_max)
390 byte_number_max = file_bytes;
393 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
397 for (f = 0; f < 2; f++)
399 off_t ig = ignore_initial[f];
400 if (ig && file_position (f) == -1)
402 /* lseek failed; read and discard the ignored initial prefix. */
405 size_t bytes_to_read = MIN (ig, buf_size);
406 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
407 if (r != bytes_to_read)
410 error (EXIT_TROUBLE, errno, "%s", file[f]);
421 size_t bytes_to_read = buf_size;
423 if (remaining != UINTMAX_MAX)
425 if (remaining < bytes_to_read)
426 bytes_to_read = remaining;
427 remaining -= bytes_to_read;
430 read0 = block_read (file_desc[0], buf0, bytes_to_read);
431 if (read0 == SIZE_MAX)
432 error (EXIT_TROUBLE, errno, "%s", file[0]);
433 read1 = block_read (file_desc[1], buf1, bytes_to_read);
434 if (read1 == SIZE_MAX)
435 error (EXIT_TROUBLE, errno, "%s", file[1]);
437 /* Insert sentinels for the block compare. */
439 buf0[read0] = ~buf1[read0];
440 buf1[read1] = ~buf0[read1];
442 /* If the line number should be written for differing files,
443 compare the blocks and count the number of newlines
445 first_diff = (comparison_type == type_first_diff
446 ? block_compare_and_count (buffer0, buffer1, &line_number)
447 : block_compare (buffer0, buffer1));
449 byte_number += first_diff;
450 smaller = MIN (read0, read1);
452 if (first_diff < smaller)
454 switch (comparison_type)
456 case type_first_diff:
458 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
459 char line_buf[INT_BUFSIZE_BOUND (off_t)];
460 char const *byte_num = offtostr (byte_number, byte_buf);
461 char const *line_num = offtostr (line_number, line_buf);
462 if (!opt_print_bytes)
464 /* See POSIX 1003.1-2001 for this format. This
465 message is used only in the POSIX locale, so it
466 need not be translated. */
467 static char const char_message[] =
468 "%s %s differ: char %s, line %s\n";
470 /* The POSIX rationale recommends using the word
471 "byte" outside the POSIX locale. Some gettext
472 implementations translate even in the POSIX
473 locale if certain other environment variables
474 are set, so use "byte" if a translation is
475 available, or if outside the POSIX locale. */
476 static char const byte_msgid[] =
477 N_("%s %s differ: byte %s, line %s\n");
478 char const *byte_message = _(byte_msgid);
479 bool use_byte_message = (byte_message != byte_msgid
480 || hard_locale_LC_MESSAGES);
482 printf ((use_byte_message
484 : "%s %s differ: char %s, line %s\n"),
485 file[0], file[1], byte_num, line_num);
489 unsigned char c0 = buf0[first_diff];
490 unsigned char c1 = buf1[first_diff];
495 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
496 file[0], file[1], byte_num, line_num,
507 unsigned char c0 = buf0[first_diff];
508 unsigned char c1 = buf1[first_diff];
511 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
512 char const *byte_num = offtostr (byte_number, byte_buf);
513 if (!opt_print_bytes)
515 /* See POSIX 1003.1-2001 for this format. */
516 printf ("%*s %3o %3o\n",
517 offset_width, byte_num, c0, c1);
525 printf ("%*s %3o %-4s %3o %s\n",
526 offset_width, byte_num, c0, s0, c1, s1);
532 while (first_diff < smaller);
540 if (comparison_type != type_status)
542 /* See POSIX 1003.1-2001 for this format. */
543 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
549 while (read0 == buf_size);
554 /* Compare two blocks of memory P0 and P1 until they differ,
555 and count the number of '\n' occurrences in the common
557 If the blocks are not guaranteed to be different, put sentinels at the ends
558 of the blocks before calling this function.
560 Return the offset of the first byte that differs.
561 Increment *COUNT by the count of '\n' occurrences. */
564 block_compare_and_count (word const *p0, word const *p1, off_t *count)
566 word l; /* One word from first buffer. */
567 word const *l0, *l1; /* Pointers into each buffer. */
568 char const *c0, *c1; /* Pointers for finding exact address. */
569 size_t cnt = 0; /* Number of '\n' occurrences. */
570 word nnnn; /* Newline, sizeof (word) times. */
574 for (i = 0; i < sizeof nnnn; i++)
575 nnnn = (nnnn << CHAR_BIT) | '\n';
577 /* Find the rough position of the first difference by reading words,
580 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
583 for (i = 0; i < sizeof l; i++)
585 cnt += ! (unsigned char) l;
590 /* Find the exact differing position (endianness independent). */
592 for (c0 = (char const *) l0, c1 = (char const *) l1;
598 return c0 - (char const *) p0;
601 /* Compare two blocks of memory P0 and P1 until they differ.
602 If the blocks are not guaranteed to be different, put sentinels at the ends
603 of the blocks before calling this function.
605 Return the offset of the first byte that differs. */
608 block_compare (word const *p0, word const *p1)
613 /* Find the rough position of the first difference by reading words,
616 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
619 /* Find the exact differing position (endianness independent). */
621 for (c0 = (char const *) l0, c1 = (char const *) l1;
626 return c0 - (char const *) p0;
629 /* Put into BUF the unsigned char C, making unprintable bytes
630 visible by quoting like cat -t does. */
633 sprintc (char *buf, unsigned char c)
659 /* Position file F to ignore_initial[F] bytes from its initial position,
660 and yield its new position. Don't try more than once. */
663 file_position (int f)
665 static bool positioned[2];
666 static off_t position[2];
671 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);