1 /* uniq -- remove duplicate lines from a sorted file
2 Copyright (C) 86, 91, 1995-1998, 1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Richard Stallman and David MacKenzie. */
24 #include <sys/types.h>
27 #include "linebuffer.h"
28 #include "long-options.h"
31 #include "memcasecmp.h"
33 /* The official name of this program (e.g., no `g' prefix). */
34 #define PROGRAM_NAME "uniq"
36 /* Undefine, to avoid warning about redefinition on some systems. */
38 #define min(x, y) ((x) < (y) ? (x) : (y))
40 /* The name this program was run with. */
43 /* Number of fields to skip on each line when doing comparisons. */
44 static int skip_fields;
46 /* Number of chars to skip after skipping any fields. */
47 static int skip_chars;
49 /* Number of chars to compare; if 0, compare the whole lines. */
50 static int check_chars;
54 count_occurrences, /* -c Print count before output lines. */
55 count_none /* Default. Do not print counts. */
58 /* Whether and how to precede the output lines with a count of the number of
59 times they occurred in the input. */
60 static enum countmode countmode;
64 output_repeated, /* -d Only lines that are repeated. */
65 output_all_repeated, /* -D All lines that are repeated. */
66 output_unique, /* -u Only lines that are not repeated. */
67 output_all /* Default. Print first copy of each line. */
70 /* Which lines to output. */
71 static enum output_mode mode;
73 /* If nonzero, ignore case when comparing. */
74 static int ignore_case;
76 static struct option const longopts[] =
78 {"count", no_argument, NULL, 'c'},
79 {"repeated", no_argument, NULL, 'd'},
80 {"all-repeated", no_argument, NULL, 'D'},
81 {"ignore-case", no_argument, NULL, 'i'},
82 {"unique", no_argument, NULL, 'u'},
83 {"skip-fields", required_argument, NULL, 'f'},
84 {"skip-chars", required_argument, NULL, 's'},
85 {"check-chars", required_argument, NULL, 'w'},
93 fprintf (stderr, _("Try `%s --help' for more information.\n"),
98 Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\
102 Discard all but one of successive identical lines from INPUT (or\n\
103 standard input), writing to OUTPUT (or standard output).\n\
105 -c, --count prefix lines by the number of occurrences\n\
106 -d, --repeated only print duplicate lines\n\
107 -D, --all-repeated print all duplicate lines\n\
108 -f, --skip-fields=N avoid comparing the first N fields\n\
109 -i, --ignore-case ignore differences in case when comparing\n\
110 -s, --skip-chars=N avoid comparing the first N characters\n\
111 -u, --unique only print unique lines\n\
112 -w, --check-chars=N compare no more than N characters in lines\n\
115 --help display this help and exit\n\
116 --version output version information and exit\n\
118 A field is a run of whitespace, then non-whitespace characters.\n\
119 Fields are skipped before chars.\n\
121 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
123 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
126 /* Given a linebuffer LINE,
127 return a pointer to the beginning of the line's field to be compared. */
130 find_field (const struct linebuffer *line)
133 register char *lp = line->buffer;
134 register int size = line->length;
137 for (count = 0; count < skip_fields && i < size; count++)
139 while (i < size && ISBLANK (lp[i]))
141 while (i < size && !ISBLANK (lp[i]))
145 for (count = 0; count < skip_chars && i < size; count++)
151 /* Return zero if two strings OLD and NEW match, nonzero if not.
152 OLD and NEW point not to the beginnings of the lines
153 but rather to the beginnings of the fields to compare.
154 OLDLEN and NEWLEN are their lengths. */
157 different (const char *old, const char *new, int oldlen, int newlen)
163 if (oldlen > check_chars)
164 oldlen = check_chars;
165 if (newlen > check_chars)
166 newlen = check_chars;
169 /* Use an if-statement here rather than a function variable to
170 avoid portability hassles of getting a non-conflicting declaration
173 order = memcasecmp (old, new, min (oldlen, newlen));
175 order = memcmp (old, new, min (oldlen, newlen));
178 return oldlen - newlen;
182 /* Output the line in linebuffer LINE to stream STREAM
183 provided that the switches say it should be output.
184 If requested, print the number of times it occurred, as well;
185 LINECOUNT + 1 is the number of times that the line occurred. */
188 writeline (const struct linebuffer *line, FILE *stream, int linecount)
190 if ((mode == output_unique && linecount != 0)
191 || (mode == output_repeated && linecount == 0)
192 || (mode == output_all_repeated && linecount == 0))
195 if (countmode == count_occurrences)
196 fprintf (stream, "%7d\t", linecount + 1);
198 fwrite (line->buffer, sizeof (char), line->length, stream);
202 /* Process input file INFILE with output to OUTFILE.
203 If either is "-", use the standard I/O stream for it instead. */
206 check_file (const char *infile, const char *outfile)
210 struct linebuffer lb1, lb2;
211 struct linebuffer *thisline, *prevline, *exch;
212 char *prevfield, *thisfield;
213 int prevlen, thislen;
216 if (STREQ (infile, "-"))
219 istream = fopen (infile, "r");
221 error (EXIT_FAILURE, errno, "%s", infile);
223 if (STREQ (outfile, "-"))
226 ostream = fopen (outfile, "w");
228 error (EXIT_FAILURE, errno, "%s", outfile);
233 initbuffer (thisline);
234 initbuffer (prevline);
236 if (readline (prevline, istream) == 0)
238 prevfield = find_field (prevline);
239 prevlen = prevline->length - (prevfield - prevline->buffer);
241 while (!feof (istream))
244 if (readline (thisline, istream) == 0)
246 thisfield = find_field (thisline);
247 thislen = thisline->length - (thisfield - thisline->buffer);
248 match = !different (thisfield, prevfield, thislen, prevlen);
253 if (!match || mode == output_all_repeated)
255 writeline (prevline, ostream, match_count);
259 prevfield = thisfield;
266 writeline (prevline, ostream, match_count);
269 if (ferror (istream) || fclose (istream) == EOF)
270 error (EXIT_FAILURE, errno, _("error reading %s"), infile);
272 if (ferror (ostream) || fclose (ostream) == EOF)
273 error (EXIT_FAILURE, errno, _("error writing %s"), outfile);
280 main (int argc, char **argv)
283 char *infile = "-", *outfile = "-";
285 program_name = argv[0];
286 setlocale (LC_ALL, "");
287 bindtextdomain (PACKAGE, LOCALEDIR);
288 textdomain (PACKAGE);
290 parse_long_options (argc, argv, "uniq", GNU_PACKAGE, VERSION,
291 "Richard Stallman and David MacKenzie", usage);
297 countmode = count_none;
299 while ((optc = getopt_long (argc, argv, "0123456789cdDf:is:uw:", longopts,
317 skip_fields = skip_fields * 10 + optc - '0';
321 countmode = count_occurrences;
325 mode = output_repeated;
329 mode = output_all_repeated;
332 case 'f': /* Like '-#'. */
335 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
336 || tmp_long <= 0 || tmp_long > INT_MAX)
337 error (EXIT_FAILURE, 0,
338 _("invalid number of fields to skip: `%s'"),
340 skip_fields = (int) tmp_long;
348 case 's': /* Like '+#'. */
351 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
352 || tmp_long <= 0 || tmp_long > INT_MAX)
353 error (EXIT_FAILURE, 0,
354 _("invalid number of bytes to skip: `%s'"),
356 skip_chars = (int) tmp_long;
361 mode = output_unique;
367 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
368 || tmp_long <= 0 || tmp_long > INT_MAX)
369 error (EXIT_FAILURE, 0,
370 _("invalid number of bytes to compare: `%s'"),
372 check_chars = (int) tmp_long;
381 if (optind >= 2 && !STREQ (argv[optind - 1], "--"))
383 /* Interpret non-option arguments with leading `+' only
384 if we haven't seen `--'. */
385 while (optind < argc && argv[optind][0] == '+')
387 char *opt_str = argv[optind++];
389 if (xstrtol (opt_str, NULL, 10, &tmp_long, "") != LONGINT_OK
390 || tmp_long <= 0 || tmp_long > INT_MAX)
391 error (EXIT_FAILURE, 0,
392 _("invalid number of bytes to compare: `%s'"),
394 skip_chars = (int) tmp_long;
399 infile = argv[optind++];
402 outfile = argv[optind++];
406 error (0, 0, _("too many arguments"));
410 if (countmode == count_occurrences && mode == output_all_repeated)
413 _("printing all duplicated lines and repeat counts is meaningless"));
417 check_file (infile, outfile);