/* cut - remove parts of lines of files
- Copyright (C) 1997-2004 Free Software Foundation, Inc.
+ Copyright (C) 1997-2005 Free Software Foundation, Inc.
Copyright (C) 1984 David M. Ihnat
This program is free software; you can redistribute it and/or modify
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Written by David Ihnat. */
with field mode. */
static bool suppress_non_delimited;
+/* If nonzero, print all bytes, characters, or fields _except_
+ those that were specified. */
+static bool complement;
+
/* The delimeter character for field mode. */
static unsigned char delim;
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
{
- OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1
+ OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
+ COMPLEMENT_OPTION
};
static struct option const longopts[] =
{
- {"bytes", required_argument, 0, 'b'},
- {"characters", required_argument, 0, 'c'},
- {"fields", required_argument, 0, 'f'},
- {"delimiter", required_argument, 0, 'd'},
- {"only-delimited", no_argument, 0, 's'},
- {"output-delimiter", required_argument, 0, OUTPUT_DELIMITER_OPTION},
+ {"bytes", required_argument, NULL, 'b'},
+ {"characters", required_argument, NULL, 'c'},
+ {"fields", required_argument, NULL, 'f'},
+ {"delimiter", required_argument, NULL, 'd'},
+ {"only-delimited", no_argument, NULL, 's'},
+ {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
+ {"complement", no_argument, NULL, COMPLEMENT_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
- {0, 0, 0, 0}
+ {NULL, 0, NULL, 0}
};
void
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
fputs (_("\
- -b, --bytes=LIST output only these bytes\n\
- -c, --characters=LIST output only these characters\n\
+ -b, --bytes=LIST select only these bytes\n\
+ -c, --characters=LIST select only these characters\n\
-d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
"), stdout);
fputs (_("\
- -f, --fields=LIST output only these fields; also print any line\n\
+ -f, --fields=LIST select only these fields; also print any line\n\
that contains no delimiter character, unless\n\
the -s option is specified\n\
-n (ignored)\n\
"), stdout);
fputs (_("\
+ --complement complement the set of selected bytes, characters\n\
+ or fields.\n\
+"), stdout);
+ fputs (_("\
-s, --only-delimited do not print lines not containing delimiters\n\
--output-delimiter=STRING use STRING as the output delimiter\n\
the default is to use the input delimiter\n\
fputs (_("\
\n\
Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
-range, or many ranges separated by commas. Each range is one of:\n\
+range, or many ranges separated by commas. Selected input is written\n\
+in the same order that it is read, and is written exactly once.\n\
+Each range is one of:\n\
\n\
N N'th byte, character or field, counted from 1\n\
N- from N'th byte, character or field, to end of line\n\
}
static inline void
+mark_range_start (size_t i)
+{
+ /* Record the fact that `i' is a range-start index. */
+ void *ent_from_table = hash_insert (range_start_ht, (void*) i);
+ if (ent_from_table == NULL)
+ {
+ /* Insertion failed due to lack of memory. */
+ xalloc_die ();
+ }
+ assert ((size_t) ent_from_table == i);
+}
+
+static inline void
mark_printable_field (size_t i)
{
size_t n = i / CHAR_BIT;
static size_t
hash_int (const void *x, size_t tablesize)
{
+#ifdef UINTPTR_MAX
uintptr_t y = (uintptr_t) x;
+#else
+ size_t y = (size_t) x;
+#endif
return y % tablesize;
}
static bool
print_kth (size_t k, bool *range_start)
{
- if ((0 < eol_range_start && eol_range_start <= k)
- || (k <= max_range_endpoint && is_printable_field (k)))
- {
- if (range_start)
- *range_start = is_range_start_index (k);
- return true;
- }
+ bool k_selected
+ = ((0 < eol_range_start && eol_range_start <= k)
+ || (k <= max_range_endpoint && is_printable_field (k)));
- return false;
+ bool is_selected = k_selected ^ complement;
+ if (range_start && is_selected)
+ *range_start = is_range_start_index (k);
+
+ return is_selected;
+}
+
+/* Comparison function for qsort to order the list of
+ struct range_pairs. */
+static int
+compare_ranges (const void *a, const void *b)
+{
+ int a_start = ((const struct range_pair *) a)->lo;
+ int b_start = ((const struct range_pair *) b)->lo;
+ return a_start < b_start ? -1 : a_start > b_start;
}
/* Given the list of field or byte range specifications FIELDSTR, set
}
else if (ISDIGIT (*fieldstr))
{
- size_t new_v;
/* Record beginning of digit string, in case we have to
complain about it. */
static char const *num_start;
in_digits = true;
/* Detect overflow. */
- new_v = 10 * value + *fieldstr - '0';
- if (SIZE_MAX / 10 < value || new_v < value * 10)
+ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', SIZE_MAX))
{
- /* In case the user specified -c4294967296-22,
+ /* In case the user specified -c4294967296,22,
complain only about the first number. */
/* Determine the length of the offending number. */
size_t len = strspn (num_start, "0123456789");
free (bad_num);
exit (EXIT_FAILURE);
}
- value = new_v;
fieldstr++;
}
printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
+ qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
+
/* Set the array entries corresponding to integers in the ranges of RP. */
for (i = 0; i < n_rp; i++)
{
size_t j;
- for (j = rp[i].lo; j <= rp[i].hi; j++)
- {
- mark_printable_field (j);
- }
- }
+ size_t rsi_candidate;
- if (output_delimiter_specified)
- {
/* Record the range-start indices, i.e., record each start
index that is not part of any other (lo..hi] range. */
- for (i = 0; i <= n_rp; i++)
- {
- size_t j;
- size_t rsi = (i < n_rp ? rp[i].lo : eol_range_start);
+ rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
+ if (output_delimiter_specified
+ && !is_printable_field (rsi_candidate))
+ mark_range_start (rsi_candidate);
- for (j = 0; j < n_rp; j++)
- {
- if (rp[j].lo < rsi && rsi <= rp[j].hi)
- {
- rsi = 0;
- break;
- }
- }
-
- if (eol_range_start && eol_range_start < rsi)
- rsi = 0;
-
- if (rsi)
- {
- /* Record the fact that `rsi' is a range-start index. */
- void *ent_from_table = hash_insert (range_start_ht, (void*) rsi);
- if (ent_from_table == NULL)
- {
- /* Insertion failed due to lack of memory. */
- xalloc_die ();
- }
- assert ((size_t) ent_from_table == rsi);
- }
- }
+ for (j = rp[i].lo; j <= rp[i].hi; j++)
+ mark_printable_field (j);
}
+ if (output_delimiter_specified
+ && !complement
+ && eol_range_start && !is_printable_field (eol_range_start))
+ mark_range_start (eol_range_start);
+
free (rp);
return field_found;
print_delimiter = false;
while (1)
{
- register int c; /* Each character from the file. */
+ int c; /* Each character from the file. */
c = getc (stream);
/* If the first field extends to the end of line (it is not
delimited) and we are printing all non-delimited lines,
print this one. */
- if ((unsigned char) field_1_buffer[n_bytes - 1] != delim)
+ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
{
if (suppress_non_delimited)
{
}
/* Process file FILE to standard output.
- Return 0 if successful, 1 if not. */
+ Return true if successful. */
-static int
+static bool
cut_file (char *file)
{
FILE *stream;
if (stream == NULL)
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
}
if (ferror (stream))
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
if (STREQ (file, "-"))
clearerr (stream); /* Also clear EOF. */
else if (fclose (stream) == EOF)
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
- return 0;
+ return true;
}
int
main (int argc, char **argv)
{
- int optc, exit_status = 0;
+ int optc;
+ bool ok;
bool delim_specified = false;
char *spec_list_string IF_LINT(= NULL);
{
switch (optc)
{
- case 0:
- break;
-
case 'b':
case 'c':
/* Build the byte list. */
suppress_non_delimited = true;
break;
+ case COMPLEMENT_OPTION:
+ complement = true;
+ break;
+
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
}
if (optind == argc)
- exit_status |= cut_file ("-");
+ ok = cut_file ("-");
else
- for (; optind < argc; optind++)
- exit_status |= cut_file (argv[optind]);
+ for (ok = true; optind < argc; optind++)
+ ok &= cut_file (argv[optind]);
if (range_start_ht)
hash_free (range_start_ht);
if (have_read_stdin && fclose (stdin) == EOF)
{
error (0, errno, "-");
- exit_status = 1;
+ ok = false;
}
- exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}