1 /* GNU cmp - compare two files byte by byte
3 Copyright (C) 1990-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013,
4 2015-2018 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #include <file-type.h>
31 #include <hard-locale.h>
34 #include <unlocked-io.h>
35 #include <version-etc.h>
37 #include <binary-io.h>
40 /* The official name of this program (e.g., no 'g' prefix). */
41 #define PROGRAM_NAME "cmp"
44 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
45 proper_name ("David MacKenzie")
47 #if defined LC_MESSAGES && ENABLE_NLS
48 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
50 # define hard_locale_LC_MESSAGES 0
53 static int cmp (void);
54 static off_t file_position (int);
55 static size_t block_compare (word const *, word const *) _GL_ATTRIBUTE_PURE;
56 static size_t count_newlines (char *, size_t);
57 static void sprintc (char *, unsigned char);
59 /* Filenames of the compared files. */
60 static char const *file[2];
62 /* File descriptors of the files. */
63 static int file_desc[2];
65 /* Status of the files. */
66 static struct stat stat_buf[2];
68 /* Read buffers for the files. */
69 static word *buffer[2];
71 /* Optimal block size for the files. */
72 static size_t buf_size;
74 /* Initial prefix to ignore for each file. */
75 static off_t ignore_initial[2];
77 /* Number of bytes to compare. */
78 static uintmax_t bytes = UINTMAX_MAX;
81 static enum comparison_type
83 type_first_diff, /* Print the first difference. */
84 type_all_diffs, /* Print all differences. */
85 type_no_stdout, /* Do not output to stdout; only stderr. */
86 type_status /* Exit status only. */
89 /* If nonzero, print values of bytes quoted like cat -t does. */
90 static bool opt_print_bytes;
92 /* Values for long options that do not have single-letter equivalents. */
95 HELP_OPTION = CHAR_MAX + 1
98 static struct option const long_options[] =
100 {"print-bytes", 0, 0, 'b'},
101 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
102 {"ignore-initial", 1, 0, 'i'},
103 {"verbose", 0, 0, 'l'},
104 {"bytes", 1, 0, 'n'},
105 {"silent", 0, 0, 's'},
106 {"quiet", 0, 0, 's'},
107 {"version", 0, 0, 'v'},
108 {"help", 0, 0, HELP_OPTION},
112 static void try_help (char const *, char const *) __attribute__((noreturn));
114 try_help (char const *reason_msgid, char const *operand)
117 error (0, 0, _(reason_msgid), operand);
118 die (EXIT_TROUBLE, 0,
119 _("Try '%s --help' for more information."), program_name);
122 static char const valid_suffixes[] = "kKMGTPEZY0";
124 /* Update ignore_initial[F] according to the result of parsing an
125 *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
126 *after the operand. If DELIMITER is nonzero, the operand may be
127 *followed by DELIMITER; otherwise it must be null-terminated. */
129 specify_ignore_initial (int f, char **argptr, char delimiter)
132 char const *arg = *argptr;
133 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
134 if (! (e == LONGINT_OK
135 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
136 || TYPE_MAXIMUM (off_t) < val)
137 try_help ("invalid --ignore-initial value '%s'", arg);
138 if (ignore_initial[f] < val)
139 ignore_initial[f] = val;
142 /* Specify the output format. */
144 specify_comparison_type (enum comparison_type t)
146 if (comparison_type && comparison_type != t)
147 try_help ("options -l and -s are incompatible", 0);
155 die (EXIT_TROUBLE, 0, "%s", _("write failed"));
156 else if (fclose (stdout) != 0)
157 die (EXIT_TROUBLE, errno, "%s", _("standard output"));
160 static char const * const option_help_msgid[] = {
161 N_("-b, --print-bytes print differing bytes"),
162 N_("-i, --ignore-initial=SKIP skip first SKIP bytes of both inputs"),
163 N_("-i, --ignore-initial=SKIP1:SKIP2 skip first SKIP1 bytes of FILE1 and\n"
164 " first SKIP2 bytes of FILE2"),
165 N_("-l, --verbose output byte numbers and differing byte values"),
166 N_("-n, --bytes=LIMIT compare at most LIMIT bytes"),
167 N_("-s, --quiet, --silent suppress all normal output"),
168 N_(" --help display this help and exit"),
169 N_("-v, --version output version information and exit"),
176 char const * const *p;
178 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180 printf ("%s\n", _("Compare two files byte by byte."));
182 _("The optional SKIP1 and SKIP2 specify the number of bytes to skip\n"
183 "at the beginning of each file (zero by default)."));
186 Mandatory arguments to long options are mandatory for short options too.\n\
188 for (p = option_help_msgid; *p; p++)
189 printf (" %s\n", _(*p));
190 printf ("\n%s\n\n%s\n%s\n",
191 _("SKIP values may be followed by the following multiplicative suffixes:\n\
192 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
193 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
194 _("If a FILE is '-' or missing, read standard input."),
195 _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."));
196 emit_bug_reporting_address ();
200 main (int argc, char **argv)
202 int c, f, exit_status;
203 size_t words_per_buffer;
205 exit_failure = EXIT_TROUBLE;
206 initialize_main (&argc, &argv);
207 set_program_name (argv[0]);
208 setlocale (LC_ALL, "");
209 bindtextdomain (PACKAGE, LOCALEDIR);
210 textdomain (PACKAGE);
213 /* Parse command line options. */
215 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
220 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
221 opt_print_bytes = true;
225 specify_ignore_initial (0, &optarg, ':');
226 if (*optarg++ == ':')
227 specify_ignore_initial (1, &optarg, 0);
228 else if (ignore_initial[1] < ignore_initial[0])
229 ignore_initial[1] = ignore_initial[0];
233 specify_comparison_type (type_all_diffs);
239 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
240 try_help ("invalid --bytes value '%s'", optarg);
247 specify_comparison_type (type_status);
251 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
252 AUTHORS, (char *) NULL);
266 try_help ("missing operand after '%s'", argv[argc - 1]);
268 file[0] = argv[optind++];
269 file[1] = optind < argc ? argv[optind++] : "-";
271 for (f = 0; f < 2 && optind < argc; f++)
273 char *arg = argv[optind++];
274 specify_ignore_initial (f, &arg, 0);
278 try_help ("extra operand '%s'", argv[optind]);
280 for (f = 0; f < 2; f++)
282 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
283 stdin is closed and opening file[0] yields file descriptor 0. */
284 int f1 = f ^ (STREQ (file[1], "-"));
286 /* Two files with the same name and offset are identical.
287 But wait until we open the file once, for proper diagnostics. */
288 if (f && ignore_initial[0] == ignore_initial[1]
289 && file_name_cmp (file[0], file[1]) == 0)
292 if (STREQ (file[f1], "-"))
294 file_desc[f1] = STDIN_FILENO;
295 if (O_BINARY && ! isatty (STDIN_FILENO))
296 set_binary_mode (STDIN_FILENO, O_BINARY);
299 file_desc[f1] = open (file[f1], O_RDONLY | O_BINARY, 0);
301 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
303 if (file_desc[f1] < 0 && comparison_type == type_status)
306 die (EXIT_TROUBLE, errno, "%s", file[f1]);
310 /* If the files are links to the same inode and have the same file position,
311 they are identical. */
313 if (0 < same_file (&stat_buf[0], &stat_buf[1])
314 && same_file_attributes (&stat_buf[0], &stat_buf[1])
315 && file_position (0) == file_position (1))
318 /* If output is redirected to the null device, we can avoid some of
321 if (comparison_type != type_status)
323 struct stat outstat, nullstat;
325 if (fstat (STDOUT_FILENO, &outstat) == 0
326 && stat (NULL_DEVICE, &nullstat) == 0
327 && 0 < same_file (&outstat, &nullstat))
328 comparison_type = type_no_stdout;
331 /* If only a return code is needed,
332 and if both input descriptors are associated with plain files,
333 conclude that the files differ if they have different sizes
334 and if more bytes will be compared than are in the smaller file. */
336 if (comparison_type == type_status
337 && S_ISREG (stat_buf[0].st_mode)
338 && S_ISREG (stat_buf[1].st_mode))
340 off_t s0 = stat_buf[0].st_size - file_position (0);
341 off_t s1 = stat_buf[1].st_size - file_position (1);
346 if (s0 != s1 && MIN (s0, s1) < bytes)
350 /* Get the optimal block size of the files. */
352 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
353 STAT_BLOCKSIZE (stat_buf[1]),
354 PTRDIFF_MAX - sizeof (word));
356 /* Allocate word-aligned buffers, with space for sentinels at the end. */
358 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
359 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
360 buffer[1] = buffer[0] + words_per_buffer;
362 exit_status = cmp ();
364 for (f = 0; f < 2; f++)
365 if (close (file_desc[f]) != 0)
366 die (EXIT_TROUBLE, errno, "%s", file[f]);
367 if (exit_status != EXIT_SUCCESS && comparison_type < type_no_stdout)
373 /* Compare the two files already open on 'file_desc[0]' and 'file_desc[1]',
374 using 'buffer[0]' and 'buffer[1]'.
375 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
381 bool at_line_start = true;
382 off_t line_number = 1; /* Line number (1...) of difference. */
383 off_t byte_number = 1; /* Byte number (1...) of difference. */
384 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
385 size_t read0, read1; /* Number of bytes read from each file. */
386 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
387 size_t smaller; /* The lesser of 'read0' and 'read1'. */
388 word *buffer0 = buffer[0];
389 word *buffer1 = buffer[1];
390 char *buf0 = (char *) buffer0;
391 char *buf1 = (char *) buffer1;
394 int offset_width IF_LINT (= 0);
396 if (comparison_type == type_all_diffs)
398 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
400 for (f = 0; f < 2; f++)
401 if (S_ISREG (stat_buf[f].st_mode))
403 off_t file_bytes = stat_buf[f].st_size - file_position (f);
404 if (file_bytes < byte_number_max)
405 byte_number_max = file_bytes;
408 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
412 for (f = 0; f < 2; f++)
414 off_t ig = ignore_initial[f];
415 if (ig && file_position (f) == -1)
417 /* lseek failed; read and discard the ignored initial prefix. */
420 size_t bytes_to_read = MIN (ig, buf_size);
421 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
422 if (r != bytes_to_read)
425 die (EXIT_TROUBLE, errno, "%s", file[f]);
436 size_t bytes_to_read = buf_size;
438 if (remaining != UINTMAX_MAX)
440 if (remaining < bytes_to_read)
441 bytes_to_read = remaining;
442 remaining -= bytes_to_read;
445 read0 = block_read (file_desc[0], buf0, bytes_to_read);
446 if (read0 == SIZE_MAX)
447 die (EXIT_TROUBLE, errno, "%s", file[0]);
448 read1 = block_read (file_desc[1], buf1, bytes_to_read);
449 if (read1 == SIZE_MAX)
450 die (EXIT_TROUBLE, errno, "%s", file[1]);
452 smaller = MIN (read0, read1);
454 /* Optimize the common case where the buffers are the same. */
455 if (memcmp (buf0, buf1, smaller) == 0)
456 first_diff = smaller;
459 /* Insert sentinels for the block compare. */
460 buf0[read0] = ~buf1[read0];
461 buf1[read1] = ~buf0[read1];
463 first_diff = block_compare (buffer0, buffer1);
466 byte_number += first_diff;
467 if (comparison_type == type_first_diff && first_diff != 0)
469 line_number += count_newlines (buf0, first_diff);
470 at_line_start = buf0[first_diff - 1] == '\n';
473 if (first_diff < smaller)
475 switch (comparison_type)
477 case type_first_diff:
479 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
480 char line_buf[INT_BUFSIZE_BOUND (off_t)];
481 char const *byte_num = offtostr (byte_number, byte_buf);
482 char const *line_num = offtostr (line_number, line_buf);
483 if (!opt_print_bytes)
485 /* See POSIX for this format. This message is
486 used only in the POSIX locale, so it need not
488 static char const char_message[] =
489 "%s %s differ: char %s, line %s\n";
491 /* The POSIX rationale recommends using the word
492 "byte" outside the POSIX locale. Some gettext
493 implementations translate even in the POSIX
494 locale if certain other environment variables
495 are set, so use "byte" if a translation is
496 available, or if outside the POSIX locale. */
497 static char const byte_msgid[] =
498 N_("%s %s differ: byte %s, line %s\n");
499 char const *byte_message = _(byte_msgid);
500 bool use_byte_message = (byte_message != byte_msgid
501 || hard_locale_LC_MESSAGES);
503 printf (use_byte_message ? byte_message : char_message,
504 file[0], file[1], byte_num, line_num);
508 unsigned char c0 = buf0[first_diff];
509 unsigned char c1 = buf1[first_diff];
514 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
515 file[0], file[1], byte_num, line_num,
526 unsigned char c0 = buf0[first_diff];
527 unsigned char c1 = buf1[first_diff];
530 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
531 char const *byte_num = offtostr (byte_number, byte_buf);
532 if (!opt_print_bytes)
534 /* See POSIX for this format. */
535 printf ("%*s %3o %3o\n",
536 offset_width, byte_num, c0, c1);
544 printf ("%*s %3o %-4s %3o %s\n",
545 offset_width, byte_num, c0, s0, c1, s1);
551 while (first_diff < smaller);
563 if (differing <= 0 && comparison_type != type_status)
565 char const *shorter_file = file[read1 < read0];
567 /* POSIX says that each of these format strings must be
568 "cmp: EOF on %s", optionally followed by a blank and
569 extra text sans newline, then terminated by "\n". */
570 if (byte_number == 1)
571 fprintf (stderr, _("cmp: EOF on %s which is empty\n"),
575 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
576 char const *byte_num = offtostr (byte_number - 1, byte_buf);
578 if (comparison_type == type_first_diff)
580 char line_buf[INT_BUFSIZE_BOUND (off_t)];
582 = offtostr (line_number - at_line_start, line_buf);
585 ? _("cmp: EOF on %s after byte %s, line %s\n")
586 : _("cmp: EOF on %s after byte %s,"
588 shorter_file, byte_num, line_num);
592 _("cmp: EOF on %s after byte %s\n"),
593 shorter_file, byte_num);
600 while (differing <= 0 && read0 == buf_size);
602 return differing == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
605 /* Compare two blocks of memory P0 and P1 until they differ.
606 If the blocks are not guaranteed to be different, put sentinels at the ends
607 of the blocks before calling this function.
609 Return the offset of the first byte that differs. */
612 block_compare (word const *p0, word const *p1)
617 /* Find the rough position of the first difference by reading words,
620 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
623 /* Find the exact differing position (endianness independent). */
625 for (c0 = (char const *) l0, c1 = (char const *) l1;
630 return c0 - (char const *) p0;
633 /* Return the number of newlines in BUF, of size BUFSIZE,
634 where BUF[NBYTES] is available for use as a sentinel. */
637 count_newlines (char *buf, size_t bufsize)
641 char *lim = buf + bufsize;
644 for (p = buf; (p = rawmemchr (p, '\n')) != lim; p++)
650 /* Put into BUF the unsigned char C, making unprintable bytes
651 visible by quoting like cat -t does. */
654 sprintc (char *buf, unsigned char c)
680 /* Position file F to ignore_initial[F] bytes from its initial position,
681 and yield its new position. Don't try more than once. */
684 file_position (int f)
686 static bool positioned[2];
687 static off_t position[2];
691 positioned[f] = true;
692 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);