+2007-05-12 James Youngman <jay@gnu.org>
+
+ Add -z option to uniq. Originally proposed by Egmont Koblinger.
+ * NEWS: Mention uniq's new option: --zero-terminated (-z).
+ * src/uniq.c: Add new option, --zero-terminated (-z), to make
+ uniq use the NUL byte as separator/delimiter rather than newline.
+ (check_file): Add a parameter: delimiter. Update caller.
+ Use readlinebuffer_delim in place of readlinebuffer everywhere.
+ (main): Handle the new option.
+ (usage): Describe new option the same way sort does.
+ * doc/coreutils.texi (uniq invocation): Describe the new option.
+
2007-05-07 Jim Meyering <jim@meyering.net>
* NEWS: Mention that last week's tr bug dates back to 1992.
@item prepend
Output a newline before each group of repeated lines.
+With @option{--zero-terminated} (@option{-z}), use
+an @acronym{ASCII} @sc{nul} (zero) byte instead of a newline.
@item separate
Separate groups of repeated lines with a single newline.
+With @option{--zero-terminated} (@option{-z}), use
+an @acronym{ASCII} @sc{nul} (zero) byte instead of a newline.
This is the same as using @samp{prepend}, except that
-there is no newline before the first group, and hence
+no delimiter is inserted before the first group, and hence
may be better suited for output direct to users.
@end table
fields and characters). By default the entire rest of the lines are
compared.
+@item -z
+@itemx --zero-terminated
+@opindex -z
+@opindex --zero-terminated
+@cindex sort zero-terminated lines
+Treat the input as a set of lines, each terminated by a null character
+(@acronym{ASCII} @sc{nul}) instead of a line feed
+(@acronym{ASCII} @sc{lf}).
+This option can be useful in conjunction with @samp{sort -z}, @samp{perl -0} or
+@samp{find -print0} and @samp{xargs -0} which do the same in order to
+reliably handle arbitrary file names (even those containing blanks
+or other special characters).
+
@end table
@exitstatus
/* uniq -- remove duplicate lines from a sorted file
- Copyright (C) 86, 91, 1995-2006 Free Software Foundation, Inc.
+ Copyright (C) 86, 91, 1995-2007 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
{"skip-fields", required_argument, NULL, 'f'},
{"skip-chars", required_argument, NULL, 's'},
{"check-chars", required_argument, NULL, 'w'},
+ {"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
-i, --ignore-case ignore differences in case when comparing\n\
-s, --skip-chars=N avoid comparing the first N characters\n\
-u, --unique only print unique lines\n\
+ -z, --zero-terminated end lines with 0 byte, not newline\n\
"), stdout);
fputs (_("\
-w, --check-chars=N compare no more than N characters in lines\n\
If either is "-", use the standard I/O stream for it instead. */
static void
-check_file (const char *infile, const char *outfile)
+check_file (const char *infile, const char *outfile, char delimiter)
{
struct linebuffer lb1, lb2;
struct linebuffer *thisline, *prevline;
{
char *thisfield;
size_t thislen;
- if (readlinebuffer (thisline, stdin) == 0)
+ if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
break;
thisfield = find_field (thisline);
thislen = thisline->length - 1 - (thisfield - thisline->buffer);
uintmax_t match_count = 0;
bool first_delimiter = true;
- if (readlinebuffer (prevline, stdin) == 0)
+ if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
goto closefiles;
prevfield = find_field (prevline);
prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
bool match;
char *thisfield;
size_t thislen;
- if (readlinebuffer (thisline, stdin) == 0)
+ if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
{
if (ferror (stdin))
goto closefiles;
if ((delimit_groups == DM_PREPEND)
|| (delimit_groups == DM_SEPARATE
&& !first_delimiter))
- putchar ('\n');
+ putchar (delimiter);
}
}
enum Skip_field_option_type skip_field_option_type = SFO_NONE;
int nfiles = 0;
char const *file[2];
+ char delimiter = '\n'; /* change with --zero-terminated, -z */
file[0] = file[1] = "-";
initialize_main (&argc, &argv);
if (optc == -1
|| (posixly_correct && nfiles != 0)
|| ((optc = getopt_long (argc, argv,
- "-0123456789Dcdf:is:uw:", longopts, NULL))
+ "-0123456789Dcdf:is:uw:z", longopts, NULL))
== -1))
{
if (argc <= optind)
N_("invalid number of bytes to compare"));
break;
+ case 'z':
+ delimiter = '\0';
+ break;
+
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
usage (EXIT_FAILURE);
}
- check_file (file[0], file[1]);
+ check_file (file[0], file[1], delimiter);
exit (EXIT_SUCCESS);
}