From 7480e2db2935663b9c0c4d66418ce2801005792d Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9my=20Compostella?= Date: Thu, 1 Mar 2012 20:37:41 +0100 Subject: [PATCH] split: support an arbitrary number of split files by default * src/split.c (next_file_name): If `suffix_auto' is true and the first suffix character is 'z', generate a new file file name adding `z' to the prefix and increasing the suffix length by one. (set_suffix_length): Disable auto suffix width in various cases. * tests/split/suffix-auto-length: Test it. * doc/coreutils.texi (split invocation): Mention it. * NEWS (Improvements): Likewise. --- NEWS | 2 ++ doc/coreutils.texi | 21 ++++++++++++--- src/split.c | 60 +++++++++++++++++++++++++++++++++++++----- tests/Makefile.am | 1 + tests/split/suffix-auto-length | 42 +++++++++++++++++++++++++++++ 5 files changed, 116 insertions(+), 10 deletions(-) create mode 100755 tests/split/suffix-auto-length diff --git a/NEWS b/NEWS index 7f36dc6..87ef7bd 100644 --- a/NEWS +++ b/NEWS @@ -60,6 +60,8 @@ GNU coreutils NEWS -*- outline -*- systems for which getfilecon-, ACL-check- and XATTR-check-induced syscalls fail with ENOTSUP or similar. + split now supports an unlimited number of split files as default behavior. + * Noteworthy changes in release 8.15 (2012-01-06) [stable] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index cce9432..f9207e7 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -2990,9 +2990,15 @@ The output files' names consist of @var{prefix} (@samp{x} by default) followed by a group of characters (@samp{aa}, @samp{ab}, @dots{} by default), such that concatenating the output files in traditional sorted order by file name produces the original input file (except -@option{-nr/@var{n}}). If the output file names are exhausted, -@command{split} reports an error without deleting the output files -that it did create. +@option{-nr/@var{n}}). By default split will initially create files +with two generated suffix characters, and will increase this width by two +when the next most significant position reaches the last character. +(@samp{yz}, @samp{zaaa}, @samp{zaab}, @dots{}). In this way an arbitrary +number of output files are supported, which sort as described above, +even in the presence of an @option{--additional-suffix} option. +If the @option{-a} option is specified and the output file names are +exhausted, @command{split} reports an error without deleting the +output files that it did create. The program accepts the following options. Also see @ref{Common options}. @@ -3082,7 +3088,11 @@ and so can be a pipe for example. @itemx --suffix-length=@var{length} @opindex -a @opindex --suffix-length -Use suffixes of length @var{length}. The default @var{length} is 2. +Use suffixes of length @var{length}. If a @var{length} of 0 is specified, +this is the same as if (any previous) @option{-a} was not specified, and +thus enables the default behavior, which starts the suffix length at 2, +and unless @option{-n} or @option{--numeric-suffixes=@var{from}} are +specified, will auto increase the length by 2 as required. @item -d @itemx --numeric-suffixes[=@var{from}] @@ -3090,6 +3100,9 @@ Use suffixes of length @var{length}. The default @var{length} is 2. @opindex --numeric-suffixes Use digits in suffixes rather than lower-case letters. The numerical suffix counts from @var{from} if specified, 0 otherwise. +Note specifying a @var{from} value also disables the default +auto suffix length expansion described above, and so you may also +want to specify @option{-a} to allow suffixes beyond @samp{99}. @itemx --additional-suffix=@var{suffix} @opindex --additional-suffix diff --git a/src/split.c b/src/split.c index 68c9a34..99f6390 100644 --- a/src/split.c +++ b/src/split.c @@ -74,6 +74,9 @@ static char *outfile; Suffixes are inserted here. */ static char *outfile_mid; +/* Generate new suffix when suffixes are exhausted. */ +static bool suffix_auto = true; + /* Length of OUTFILE's suffix. */ static size_t suffix_length; @@ -155,6 +158,12 @@ set_suffix_length (uintmax_t n_units, enum Split_type split_type) size_t suffix_needed = 0; + /* The suffix auto length feature is incompatible with + a user specified start value as the generated suffixes + are not all consecutive. */ + if (numeric_suffix_start) + suffix_auto = false; + /* Auto-calculate the suffix length if the number of files is given. */ if (split_type == type_chunk_bytes || split_type == type_chunk_lines || split_type == type_rr) @@ -164,6 +173,7 @@ set_suffix_length (uintmax_t n_units, enum Split_type split_type) while (n_units /= alphabet_len) suffix_needed++; suffix_needed += alphabet_slop; + suffix_auto = false; } if (suffix_length) /* set by user */ @@ -174,6 +184,7 @@ set_suffix_length (uintmax_t n_units, enum Split_type split_type) _("the suffix length needs to be at least %zu"), suffix_needed); } + suffix_auto = false; return; } else @@ -242,27 +253,62 @@ next_file_name (void) { /* Index in suffix_alphabet of each character in the suffix. */ static size_t *sufindex; + static size_t outbase_length; + static size_t outfile_length; + static size_t addsuf_length; if (! outfile) { - /* Allocate and initialize the first file name. */ + bool widen; + +new_name: + widen = !! outfile_length; + + if (! widen) + { + /* Allocate and initialize the first file name. */ + + outbase_length = strlen (outbase); + addsuf_length = additional_suffix ? strlen (additional_suffix) : 0; + outfile_length = outbase_length + suffix_length + addsuf_length; + } + else + { + /* Reallocate and initialize a new wider file name. + We do this by subsuming the unchanging part of + the generated suffix into the prefix (base), and + reinitializing the now one longer suffix. */ + + outfile_length += 2; + suffix_length++; + } - size_t outbase_length = strlen (outbase); - size_t addsuf_length = additional_suffix ? strlen (additional_suffix) : 0; - size_t outfile_length = outbase_length + suffix_length + addsuf_length; if (outfile_length + 1 < outbase_length) xalloc_die (); - outfile = xmalloc (outfile_length + 1); + outfile = xrealloc (outfile, outfile_length + 1); + + if (! widen) + memcpy (outfile, outbase, outbase_length); + else + { + /* Append the last alphabet character to the file name prefix. */ + outfile[outbase_length] = suffix_alphabet[sufindex[0]]; + outbase_length++; + } + outfile_mid = outfile + outbase_length; - memcpy (outfile, outbase, outbase_length); memset (outfile_mid, suffix_alphabet[0], suffix_length); if (additional_suffix) memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length); outfile[outfile_length] = 0; + + free (sufindex); sufindex = xcalloc (suffix_length, sizeof *sufindex); if (numeric_suffix_start) { + assert (! widen); + /* Update the output file name. */ size_t i = strlen (numeric_suffix_start); memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i); @@ -295,6 +341,8 @@ next_file_name (void) while (i-- != 0) { sufindex[i]++; + if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1]) + goto new_name; outfile_mid[i] = suffix_alphabet[sufindex[i]]; if (outfile_mid[i]) return; diff --git a/tests/Makefile.am b/tests/Makefile.am index 5e184ac..d7a1837 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -253,6 +253,7 @@ TESTS = \ misc/sort-version \ misc/sort-NaN-infloop \ split/filter \ + split/suffix-auto-length \ split/suffix-length \ split/additional-suffix \ split/b-chunk \ diff --git a/tests/split/suffix-auto-length b/tests/split/suffix-auto-length new file mode 100755 index 0000000..dacc951 --- /dev/null +++ b/tests/split/suffix-auto-length @@ -0,0 +1,42 @@ +#!/bin/sh +# Test the suffix auto widening functionality + +# Copyright (C) 2012 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ split + + +# ensure this feature is off when start number specified +truncate -s12 file.in +split file.in -b1 --numeric=89 && fail=1 +test "$(ls -1 x* | wc -l)" = 11 || fail=1 +rm -f x* + +# ensure this feature works when no start num specified +truncate -s91 file.in +for prefix in 'x' 'xx' ''; do + for add_suffix in '.txt' ''; do + split file.in "$prefix" -b1 --numeric --additional-suffix="$add_suffix" \ + || fail=1 + test "$(ls -1 $prefix*[0-9]*$add_suffix | wc -l)" = 91 || fail=1 + test -e "${prefix}89$add_suffix" || fail=1 + test -e "${prefix}9000$add_suffix" || fail=1 + rm -f $prefix*[0-9]*$add_suffix + done +done + +Exit $fail -- 2.7.4