1 /* uniq -- remove duplicate lines from a sorted file
2 Copyright (C) 86, 91, 95, 1996 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Richard Stallman and David MacKenzie. */
22 /* Get isblank from GNU libc. */
27 #include <sys/types.h>
34 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
38 # define INT_MAX ((int) (UINT_MAX >> 1))
42 #include "linebuffer.h"
45 #include "memcasecmp.h"
47 /* Undefine, to avoid warning about redefinition on some systems. */
49 #define min(x, y) ((x) < (y) ? (x) : (y))
51 /* The name this program was run with. */
54 /* Number of fields to skip on each line when doing comparisons. */
55 static int skip_fields;
57 /* Number of chars to skip after skipping any fields. */
58 static int skip_chars;
60 /* Number of chars to compare; if 0, compare the whole lines. */
61 static int check_chars;
65 count_occurrences, /* -c Print count before output lines. */
66 count_none /* Default. Do not print counts. */
69 /* Whether and how to precede the output lines with a count of the number of
70 times they occurred in the input. */
71 static enum countmode countmode;
75 output_repeated, /* -d Only lines that are repeated. */
76 output_unique, /* -u Only lines that are not repeated. */
77 output_all /* Default. Print first copy of each line. */
80 /* Which lines to output. */
81 static enum output_mode mode;
83 /* If nonzero, ignore case when comparing. */
84 static int ignore_case;
86 /* If nonzero, display usage information and exit. */
89 /* If nonzero, print the version on standard output then exit. */
90 static int show_version;
92 static struct option const longopts[] =
94 {"count", no_argument, NULL, 'c'},
95 {"repeated", no_argument, NULL, 'd'},
96 {"ignore-case", no_argument, NULL, 'i'},
97 {"unique", no_argument, NULL, 'u'},
98 {"skip-fields", required_argument, NULL, 'f'},
99 {"skip-chars", required_argument, NULL, 's'},
100 {"check-chars", required_argument, NULL, 'w'},
101 {"help", no_argument, &show_help, 1},
102 {"version", no_argument, &show_version, 1},
110 fprintf (stderr, _("Try `%s --help' for more information.\n"),
115 Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\
119 Discard all but one of successive identical lines from INPUT (or\n\
120 standard input), writing to OUTPUT (or standard output).\n\
122 -c, --count prefix lines by the number of occurrences\n\
123 -d, --repeated only print duplicate lines\n\
124 -f, --skip-fields=N avoid comparing the first N fields\n\
125 -i, --ignore-case ignore differences in case when comparing\n\
126 -s, --skip-chars=N avoid comparing the first N characters\n\
127 -u, --unique only print unique lines\n\
128 -w, --check-chars=N compare no more than N characters in lines\n\
131 --help display this help and exit\n\
132 --version output version information and exit\n\
134 A field is a run of whitespace, than non-whitespace characters.\n\
135 Fields are skipped before chars.\n\
137 puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu"));
139 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
142 /* Given a linebuffer LINE,
143 return a pointer to the beginning of the line's field to be compared. */
146 find_field (const struct linebuffer *line)
149 register char *lp = line->buffer;
150 register int size = line->length;
153 for (count = 0; count < skip_fields && i < size; count++)
155 while (i < size && ISBLANK (lp[i]))
157 while (i < size && !ISBLANK (lp[i]))
161 for (count = 0; count < skip_chars && i < size; count++)
167 /* Return zero if two strings OLD and NEW match, nonzero if not.
168 OLD and NEW point not to the beginnings of the lines
169 but rather to the beginnings of the fields to compare.
170 OLDLEN and NEWLEN are their lengths. */
173 different (const char *old, const char *new, int oldlen, int newlen)
179 if (oldlen > check_chars)
180 oldlen = check_chars;
181 if (newlen > check_chars)
182 newlen = check_chars;
185 /* Use an if-statement here rather than a function variable to
186 avoid portability hassles of getting a non-conflicting declaration
189 order = memcasecmp (old, new, min (oldlen, newlen));
191 order = memcmp (old, new, min (oldlen, newlen));
194 return oldlen - newlen;
198 /* Output the line in linebuffer LINE to stream STREAM
199 provided that the switches say it should be output.
200 If requested, print the number of times it occurred, as well;
201 LINECOUNT + 1 is the number of times that the line occurred. */
204 writeline (const struct linebuffer *line, FILE *stream, int linecount)
206 if ((mode == output_unique && linecount != 0)
207 || (mode == output_repeated && linecount == 0))
210 if (countmode == count_occurrences)
211 fprintf (stream, "%7d\t", linecount + 1);
213 fwrite (line->buffer, sizeof (char), line->length, stream);
217 /* Process input file INFILE with output to OUTFILE.
218 If either is "-", use the standard I/O stream for it instead. */
221 check_file (const char *infile, const char *outfile)
225 struct linebuffer lb1, lb2;
226 struct linebuffer *thisline, *prevline, *exch;
227 char *prevfield, *thisfield;
228 int prevlen, thislen;
231 if (!strcmp (infile, "-"))
234 istream = fopen (infile, "r");
236 error (EXIT_FAILURE, errno, "%s", infile);
238 if (!strcmp (outfile, "-"))
241 ostream = fopen (outfile, "w");
243 error (EXIT_FAILURE, errno, "%s", outfile);
248 initbuffer (thisline);
249 initbuffer (prevline);
251 if (readline (prevline, istream) == 0)
253 prevfield = find_field (prevline);
254 prevlen = prevline->length - (prevfield - prevline->buffer);
256 while (!feof (istream))
258 if (readline (thisline, istream) == 0)
260 thisfield = find_field (thisline);
261 thislen = thisline->length - (thisfield - thisline->buffer);
262 if (!different (thisfield, prevfield, thislen, prevlen))
266 writeline (prevline, ostream, match_count);
272 prevfield = thisfield;
277 writeline (prevline, ostream, match_count);
280 if (ferror (istream) || fclose (istream) == EOF)
281 error (EXIT_FAILURE, errno, _("error reading %s"), infile);
283 if (ferror (ostream) || fclose (ostream) == EOF)
284 error (EXIT_FAILURE, errno, _("error writing %s"), outfile);
291 main (int argc, char **argv)
294 char *infile = "-", *outfile = "-";
296 program_name = argv[0];
297 setlocale (LC_ALL, "");
298 bindtextdomain (PACKAGE, LOCALEDIR);
299 textdomain (PACKAGE);
305 countmode = count_none;
307 while ((optc = getopt_long (argc, argv, "0123456789cdf:is:uw:", longopts,
325 skip_fields = skip_fields * 10 + optc - '0';
329 countmode = count_occurrences;
333 mode = output_repeated;
336 case 'f': /* Like '-#'. */
339 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
340 || tmp_long <= 0 || tmp_long > INT_MAX)
341 error (EXIT_FAILURE, 0,
342 _("invalid number of fields to skip: `%s'"),
344 skip_fields = (int) tmp_long;
352 case 's': /* Like '+#'. */
355 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
356 || tmp_long <= 0 || tmp_long > INT_MAX)
357 error (EXIT_FAILURE, 0,
358 _("invalid number of bytes to skip: `%s'"),
360 skip_chars = (int) tmp_long;
365 mode = output_unique;
371 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
372 || tmp_long <= 0 || tmp_long > INT_MAX)
373 error (EXIT_FAILURE, 0,
374 _("invalid number of bytes to compare: `%s'"),
376 check_chars = (int) tmp_long;
387 printf ("uniq (%s) %s\n", GNU_PACKAGE, VERSION);
394 if (optind >= 2 && strcmp (argv[optind - 1], "--") != 0)
396 /* Interpret non-option arguments with leading `+' only
397 if we haven't seen `--'. */
398 while (optind < argc && argv[optind][0] == '+')
400 char *opt_str = argv[optind++];
402 if (xstrtol (opt_str, NULL, 10, &tmp_long, "") != LONGINT_OK
403 || tmp_long <= 0 || tmp_long > INT_MAX)
404 error (EXIT_FAILURE, 0,
405 _("invalid number of bytes to compare: `%s'"),
407 skip_chars = (int) tmp_long;
412 infile = argv[optind++];
415 outfile = argv[optind++];
418 usage (1); /* Extra arguments. */
420 check_file (infile, outfile);