1 /* GNU cmp - compare two files byte by byte
3 Copyright (C) 1990-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013,
4 2015-2021 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #include <file-type.h>
31 #include <hard-locale.h>
34 #include <unlocked-io.h>
35 #include <version-etc.h>
37 #include <binary-io.h>
41 /* The official name of this program (e.g., no 'g' prefix). */
42 #define PROGRAM_NAME "cmp"
45 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
46 proper_name ("David MacKenzie")
48 #if defined LC_MESSAGES && ENABLE_NLS
49 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
51 # define hard_locale_LC_MESSAGES 0
54 static int cmp (void);
55 static off_t file_position (int);
56 static size_t block_compare (word const *, word const *) _GL_ATTRIBUTE_PURE;
57 static size_t count_newlines (char *, size_t);
58 static void sprintc (char *, unsigned char);
60 /* Filenames of the compared files. */
61 static char const *file[2];
63 /* File descriptors of the files. */
64 static int file_desc[2];
66 /* Status of the files. */
67 static struct stat stat_buf[2];
69 /* Read buffers for the files. */
70 static word *buffer[2];
72 /* Optimal block size for the files. */
73 static size_t buf_size;
75 /* Initial prefix to ignore for each file. */
76 static off_t ignore_initial[2];
78 /* Number of bytes to compare, or -1 if there is no limit. */
79 static intmax_t bytes = -1;
82 static enum comparison_type
84 type_first_diff, /* Print the first difference. */
85 type_all_diffs, /* Print all differences. */
86 type_no_stdout, /* Do not output to stdout; only stderr. */
87 type_status /* Exit status only. */
90 /* If nonzero, print values of bytes quoted like cat -t does. */
91 static bool opt_print_bytes;
93 /* Values for long options that do not have single-letter equivalents. */
96 HELP_OPTION = CHAR_MAX + 1
99 static struct option const long_options[] =
101 {"print-bytes", 0, 0, 'b'},
102 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
103 {"ignore-initial", 1, 0, 'i'},
104 {"verbose", 0, 0, 'l'},
105 {"bytes", 1, 0, 'n'},
106 {"silent", 0, 0, 's'},
107 {"quiet", 0, 0, 's'},
108 {"version", 0, 0, 'v'},
109 {"help", 0, 0, HELP_OPTION},
113 static void try_help (char const *, char const *) __attribute__((noreturn));
115 try_help (char const *reason_msgid, char const *operand)
118 error (0, 0, _(reason_msgid), operand);
119 die (EXIT_TROUBLE, 0,
120 _("Try '%s --help' for more information."), program_name);
123 static char const valid_suffixes[] = "kKMGTPEZY0";
125 /* Update ignore_initial[F] according to the result of parsing an
126 *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
127 *after the operand. If DELIMITER is nonzero, the operand may be
128 *followed by DELIMITER; otherwise it must be null-terminated. */
130 specify_ignore_initial (int f, char **argptr, char delimiter)
133 char const *arg = *argptr;
134 strtol_error e = xstrtoimax (arg, argptr, 0, &val, valid_suffixes);
135 if (! ((e == LONGINT_OK
136 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
137 && 0 <= val && val <= TYPE_MAXIMUM (off_t)))
138 try_help ("invalid --ignore-initial value '%s'", arg);
139 if (ignore_initial[f] < val)
140 ignore_initial[f] = val;
143 /* Specify the output format. */
145 specify_comparison_type (enum comparison_type t)
147 if (comparison_type && comparison_type != t)
148 try_help ("options -l and -s are incompatible", 0);
156 die (EXIT_TROUBLE, 0, "%s", _("write failed"));
157 else if (fclose (stdout) != 0)
158 die (EXIT_TROUBLE, errno, "%s", _("standard output"));
161 static char const * const option_help_msgid[] = {
162 N_("-b, --print-bytes print differing bytes"),
163 N_("-i, --ignore-initial=SKIP skip first SKIP bytes of both inputs"),
164 N_("-i, --ignore-initial=SKIP1:SKIP2 skip first SKIP1 bytes of FILE1 and\n"
165 " first SKIP2 bytes of FILE2"),
166 N_("-l, --verbose output byte numbers and differing byte values"),
167 N_("-n, --bytes=LIMIT compare at most LIMIT bytes"),
168 N_("-s, --quiet, --silent suppress all normal output"),
169 N_(" --help display this help and exit"),
170 N_("-v, --version output version information and exit"),
177 char const * const *p;
179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
181 printf ("%s\n", _("Compare two files byte by byte."));
183 _("The optional SKIP1 and SKIP2 specify the number of bytes to skip\n"
184 "at the beginning of each file (zero by default)."));
187 Mandatory arguments to long options are mandatory for short options too.\n\
189 for (p = option_help_msgid; *p; p++)
190 printf (" %s\n", _(*p));
191 printf ("\n%s\n\n%s\n%s\n",
192 _("SKIP values may be followed by the following multiplicative suffixes:\n\
193 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
194 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
195 _("If a FILE is '-' or missing, read standard input."),
196 _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."));
197 emit_bug_reporting_address ();
201 main (int argc, char **argv)
204 size_t words_per_buffer;
206 exit_failure = EXIT_TROUBLE;
207 initialize_main (&argc, &argv);
208 set_program_name (argv[0]);
209 setlocale (LC_ALL, "");
210 bindtextdomain (PACKAGE, LOCALEDIR);
211 textdomain (PACKAGE);
215 /* Parse command line options. */
217 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
222 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
223 opt_print_bytes = true;
227 specify_ignore_initial (0, &optarg, ':');
228 if (*optarg++ == ':')
229 specify_ignore_initial (1, &optarg, 0);
230 else if (ignore_initial[1] < ignore_initial[0])
231 ignore_initial[1] = ignore_initial[0];
235 specify_comparison_type (type_all_diffs);
241 if (xstrtoimax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK
243 try_help ("invalid --bytes value '%s'", optarg);
244 if (! (0 <= bytes && bytes < n))
250 specify_comparison_type (type_status);
254 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
255 AUTHORS, (char *) NULL);
269 try_help ("missing operand after '%s'", argv[argc - 1]);
271 file[0] = argv[optind++];
272 file[1] = optind < argc ? argv[optind++] : "-";
274 for (int f = 0; f < 2 && optind < argc; f++)
276 char *arg = argv[optind++];
277 specify_ignore_initial (f, &arg, 0);
281 try_help ("extra operand '%s'", argv[optind]);
283 for (int f = 0; f < 2; f++)
285 /* Two files with the same name and offset are identical.
286 But wait until we open the file once, for proper diagnostics. */
287 if (f && ignore_initial[0] == ignore_initial[1]
288 && file_name_cmp (file[0], file[1]) == 0)
291 if (STREQ (file[f], "-"))
293 file_desc[f] = STDIN_FILENO;
294 if (O_BINARY && ! isatty (STDIN_FILENO))
295 set_binary_mode (STDIN_FILENO, O_BINARY);
298 file_desc[f] = open (file[f], O_RDONLY | O_BINARY, 0);
300 if (file_desc[f] < 0 || fstat (file_desc[f], stat_buf + f) != 0)
302 if (file_desc[f] < 0 && comparison_type == type_status)
305 die (EXIT_TROUBLE, errno, "%s", file[f]);
309 /* If the files are links to the same inode and have the same file position,
310 they are identical. */
312 if (0 < same_file (&stat_buf[0], &stat_buf[1])
313 && same_file_attributes (&stat_buf[0], &stat_buf[1])
314 && file_position (0) == file_position (1))
317 /* If output is redirected to the null device, we can avoid some of
320 if (comparison_type != type_status)
322 struct stat outstat, nullstat;
324 if (fstat (STDOUT_FILENO, &outstat) == 0
325 && stat (NULL_DEVICE, &nullstat) == 0
326 && 0 < same_file (&outstat, &nullstat))
327 comparison_type = type_no_stdout;
330 /* If only a return code is needed,
331 and if both input descriptors are associated with plain files,
332 conclude that the files differ if they have different sizes
333 and if more bytes will be compared than are in the smaller file. */
335 if (comparison_type == type_status
336 && S_ISREG (stat_buf[0].st_mode)
337 && S_ISREG (stat_buf[1].st_mode))
339 off_t s0 = stat_buf[0].st_size - file_position (0);
340 off_t s1 = stat_buf[1].st_size - file_position (1);
345 if (s0 != s1 && (bytes < 0 || MIN (s0, s1) < bytes))
349 /* Get the optimal block size of the files. */
351 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
352 STAT_BLOCKSIZE (stat_buf[1]),
353 PTRDIFF_MAX - sizeof (word));
355 /* Allocate word-aligned buffers, with space for sentinels at the end. */
357 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
358 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
359 buffer[1] = buffer[0] + words_per_buffer;
361 exit_status = cmp ();
363 for (int f = 0; f < 2; f++)
364 if (close (file_desc[f]) != 0)
365 die (EXIT_TROUBLE, errno, "%s", file[f]);
366 if (exit_status != EXIT_SUCCESS && comparison_type < type_no_stdout)
372 /* Compare the two files already open on 'file_desc[0]' and 'file_desc[1]',
373 using 'buffer[0]' and 'buffer[1]'.
374 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
380 bool at_line_start = true;
381 off_t line_number = 1; /* Line number (1...) of difference. */
382 off_t byte_number = 1; /* Byte number (1...) of difference. */
383 intmax_t remaining = bytes; /* Remaining bytes to compare, or -1. */
384 size_t read0, read1; /* Number of bytes read from each file. */
385 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
386 size_t smaller; /* The lesser of 'read0' and 'read1'. */
387 word *buffer0 = buffer[0];
388 word *buffer1 = buffer[1];
389 char *buf0 = (char *) buffer0;
390 char *buf1 = (char *) buffer1;
393 int offset_width IF_LINT (= 0);
395 if (comparison_type == type_all_diffs)
397 off_t byte_number_max = (0 <= bytes && bytes <= TYPE_MAXIMUM (off_t)
398 ? bytes : TYPE_MAXIMUM (off_t));
400 for (f = 0; f < 2; f++)
401 if (S_ISREG (stat_buf[f].st_mode))
403 off_t file_bytes = stat_buf[f].st_size - file_position (f);
404 if (file_bytes < byte_number_max)
405 byte_number_max = file_bytes;
408 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
412 for (f = 0; f < 2; f++)
414 off_t ig = ignore_initial[f];
415 if (ig && file_position (f) == -1)
417 /* lseek failed; read and discard the ignored initial prefix. */
420 size_t bytes_to_read = MIN (ig, buf_size);
421 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
422 if (r != bytes_to_read)
425 die (EXIT_TROUBLE, errno, "%s", file[f]);
436 size_t bytes_to_read = buf_size;
440 if (remaining < bytes_to_read)
441 bytes_to_read = remaining;
442 remaining -= bytes_to_read;
445 read0 = block_read (file_desc[0], buf0, bytes_to_read);
446 if (read0 == SIZE_MAX)
447 die (EXIT_TROUBLE, errno, "%s", file[0]);
448 read1 = block_read (file_desc[1], buf1, bytes_to_read);
449 if (read1 == SIZE_MAX)
450 die (EXIT_TROUBLE, errno, "%s", file[1]);
452 smaller = MIN (read0, read1);
454 /* Optimize the common case where the buffers are the same. */
455 if (memcmp (buf0, buf1, smaller) == 0)
456 first_diff = smaller;
459 /* Insert sentinels for the block compare. */
461 buf1[read0] = 0x55; /* arbitrary */
463 buf0[read1] = 0x79; /* arbitrary and distinct from the above */
464 buf0[read0] = ~buf1[read0];
465 buf1[read1] = ~buf0[read1];
466 /* Ensure all bytes of a final word-read are initialized. */
467 memset (buf0 + read0 + 1, 0,
468 sizeof (word) - read0 % sizeof (word) - 1);
469 memset (buf1 + read1 + 1, 0,
470 sizeof (word) - read1 % sizeof (word) - 1);
472 first_diff = block_compare (buffer0, buffer1);
475 byte_number += first_diff;
476 if (comparison_type == type_first_diff && first_diff != 0)
478 line_number += count_newlines (buf0, first_diff);
479 at_line_start = buf0[first_diff - 1] == '\n';
482 if (first_diff < smaller)
484 switch (comparison_type)
486 case type_first_diff:
488 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
489 char line_buf[INT_BUFSIZE_BOUND (off_t)];
490 char const *byte_num = offtostr (byte_number, byte_buf);
491 char const *line_num = offtostr (line_number, line_buf);
492 if (!opt_print_bytes)
494 /* See POSIX for this format. This message is
495 used only in the POSIX locale, so it need not
497 static char const char_message[] =
498 "%s %s differ: char %s, line %s\n";
500 /* The POSIX rationale recommends using the word
501 "byte" outside the POSIX locale. Some gettext
502 implementations translate even in the POSIX
503 locale if certain other environment variables
504 are set, so use "byte" if a translation is
505 available, or if outside the POSIX locale. */
506 static char const byte_msgid[] =
507 N_("%s %s differ: byte %s, line %s\n");
508 char const *byte_message = _(byte_msgid);
509 bool use_byte_message = (byte_message != byte_msgid
510 || hard_locale_LC_MESSAGES);
512 printf (use_byte_message ? byte_message : char_message,
513 file[0], file[1], byte_num, line_num);
517 unsigned char c0 = buf0[first_diff];
518 unsigned char c1 = buf1[first_diff];
523 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
524 file[0], file[1], byte_num, line_num,
535 unsigned char c0 = buf0[first_diff];
536 unsigned char c1 = buf1[first_diff];
539 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
540 char const *byte_num = offtostr (byte_number, byte_buf);
541 if (!opt_print_bytes)
543 /* See POSIX for this format. */
544 printf ("%*s %3o %3o\n",
545 offset_width, byte_num, c0, c1);
553 printf ("%*s %3o %-4s %3o %s\n",
554 offset_width, byte_num, c0, s0, c1, s1);
560 while (first_diff < smaller);
572 if (differing <= 0 && comparison_type != type_status)
574 char const *shorter_file = file[read1 < read0];
576 /* POSIX says that each of these format strings must be
577 "cmp: EOF on %s", optionally followed by a blank and
578 extra text sans newline, then terminated by "\n". */
579 if (byte_number == 1)
580 fprintf (stderr, _("cmp: EOF on %s which is empty\n"),
584 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
585 char const *byte_num = offtostr (byte_number - 1, byte_buf);
587 if (comparison_type == type_first_diff)
589 char line_buf[INT_BUFSIZE_BOUND (off_t)];
591 = offtostr (line_number - at_line_start, line_buf);
594 ? _("cmp: EOF on %s after byte %s, line %s\n")
595 : _("cmp: EOF on %s after byte %s,"
597 shorter_file, byte_num, line_num);
601 _("cmp: EOF on %s after byte %s\n"),
602 shorter_file, byte_num);
609 while (differing <= 0 && read0 == buf_size);
611 return differing == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
614 /* Compare two blocks of memory P0 and P1 until they differ.
615 If the blocks are not guaranteed to be different, put sentinels at the ends
616 of the blocks before calling this function.
618 Return the offset of the first byte that differs. */
621 block_compare (word const *p0, word const *p1)
626 /* Find the rough position of the first difference by reading words,
629 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
632 /* Find the exact differing position (endianness independent). */
634 for (c0 = (char const *) l0, c1 = (char const *) l1;
639 return c0 - (char const *) p0;
642 /* Return the number of newlines in BUF, of size BUFSIZE,
643 where BUF[NBYTES] is available for use as a sentinel. */
646 count_newlines (char *buf, size_t bufsize)
650 char *lim = buf + bufsize;
653 for (p = buf; (p = rawmemchr (p, '\n')) != lim; p++)
659 /* Put into BUF the unsigned char C, making unprintable bytes
660 visible by quoting like cat -t does. */
663 sprintc (char *buf, unsigned char c)
689 /* Position file F to ignore_initial[F] bytes from its initial position,
690 and yield its new position. Don't try more than once. */
693 file_position (int f)
695 static bool positioned[2];
696 static off_t position[2];
700 positioned[f] = true;
701 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);