1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
29 #include <sys/types.h>
34 #include "fd-reopen.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
40 #include "safe-read.h"
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "split"
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command;
55 /* Process ID of the filter. */
56 static int filter_pid;
58 /* Array of open pipes. */
59 static int *open_pipes;
60 static size_t open_pipes_alloc;
61 static size_t n_open_pipes;
63 /* Blocked signals. */
64 static sigset_t oldblocked;
65 static sigset_t newblocked;
67 /* Base name of output files. */
68 static char const *outbase;
70 /* Name of output files. */
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid;
77 /* Generate new suffix when suffixes are exhausted. */
78 static bool suffix_auto = true;
80 /* Length of OUTFILE's suffix. */
81 static size_t suffix_length;
83 /* Alphabet of characters to use in suffix. */
84 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
86 /* Numerical suffix start value. */
87 static const char *numeric_suffix_start;
89 /* Additional suffix to append to output file names. */
90 static char const *additional_suffix;
92 /* Name of input file. May be "-". */
95 /* stat buf for input file. */
96 static struct stat in_stat_buf;
98 /* Descriptor on which output file is open. */
99 static int output_desc = -1;
101 /* If true, print a diagnostic on standard error just before each
102 output file is opened. */
105 /* If true, don't generate zero length output files. */
106 static bool elide_empty_files;
108 /* If true, in round robin mode, immediately copy
109 input to output, which is much slower, so disabled by default. */
110 static bool unbuffered;
112 /* The split mode to use. */
115 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
116 type_chunk_bytes, type_chunk_lines, type_rr
119 /* For long options that have no equivalent short option, use a
120 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
123 VERBOSE_OPTION = CHAR_MAX + 1,
126 ADDITIONAL_SUFFIX_OPTION
129 static struct option const longopts[] =
131 {"bytes", required_argument, NULL, 'b'},
132 {"lines", required_argument, NULL, 'l'},
133 {"line-bytes", required_argument, NULL, 'C'},
134 {"number", required_argument, NULL, 'n'},
135 {"elide-empty-files", no_argument, NULL, 'e'},
136 {"unbuffered", no_argument, NULL, 'u'},
137 {"suffix-length", required_argument, NULL, 'a'},
138 {"additional-suffix", required_argument, NULL,
139 ADDITIONAL_SUFFIX_OPTION},
140 {"numeric-suffixes", optional_argument, NULL, 'd'},
141 {"filter", required_argument, NULL, FILTER_OPTION},
142 {"verbose", no_argument, NULL, VERBOSE_OPTION},
143 {"-io-blksize", required_argument, NULL,
144 IO_BLKSIZE_OPTION}, /* do not document */
145 {GETOPT_HELP_OPTION_DECL},
146 {GETOPT_VERSION_OPTION_DECL},
150 /* Return true if the errno value, ERR, is ignorable. */
154 return filter_command && err == EPIPE;
158 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
160 #define DEFAULT_SUFFIX_LENGTH 2
162 size_t suffix_needed = 0;
164 /* The suffix auto length feature is incompatible with
165 a user specified start value as the generated suffixes
166 are not all consecutive. */
167 if (numeric_suffix_start)
170 /* Auto-calculate the suffix length if the number of files is given. */
171 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
172 || split_type == type_rr)
174 size_t alphabet_len = strlen (suffix_alphabet);
175 bool alphabet_slop = (n_units % alphabet_len) != 0;
176 while (n_units /= alphabet_len)
178 suffix_needed += alphabet_slop;
182 if (suffix_length) /* set by user */
184 if (suffix_length < suffix_needed)
186 error (EXIT_FAILURE, 0,
187 _("the suffix length needs to be at least %zu"),
194 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
200 if (status != EXIT_SUCCESS)
205 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
209 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
210 size is 1000 lines, and default PREFIX is 'x'. With no INPUT, or when INPUT\n\
211 is -, read standard input.\n\
215 Mandatory arguments to long options are mandatory for short options too.\n\
217 fprintf (stdout, _("\
218 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
219 --additional-suffix=SUFFIX append an additional SUFFIX to file names.\n\
220 -b, --bytes=SIZE put SIZE bytes per output file\n\
221 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
222 -d, --numeric-suffixes[=FROM] use numeric suffixes instead of alphabetic.\n\
223 FROM changes the start value (default 0).\n\
224 -e, --elide-empty-files do not generate empty output files with '-n'\n\
225 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
226 -l, --lines=NUMBER put NUMBER lines per output file\n\
227 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
228 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
229 "), DEFAULT_SUFFIX_LENGTH);
231 --verbose print a diagnostic just before each\n\
232 output file is opened\n\
234 fputs (HELP_OPTION_DESCRIPTION, stdout);
235 fputs (VERSION_OPTION_DESCRIPTION, stdout);
239 N split into N files based on size of input\n\
240 K/N output Kth of N to stdout\n\
241 l/N split into N files without splitting lines\n\
242 l/K/N output Kth of N to stdout without splitting lines\n\
243 r/N like 'l' but use round robin distribution\n\
244 r/K/N likewise but only output Kth of N to stdout\n\
246 emit_ancillary_info ();
251 /* Compute the next sequential output file name and store it into the
255 next_file_name (void)
257 /* Index in suffix_alphabet of each character in the suffix. */
258 static size_t *sufindex;
259 static size_t outbase_length;
260 static size_t outfile_length;
261 static size_t addsuf_length;
268 widen = !! outfile_length;
272 /* Allocate and initialize the first file name. */
274 outbase_length = strlen (outbase);
275 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
276 outfile_length = outbase_length + suffix_length + addsuf_length;
280 /* Reallocate and initialize a new wider file name.
281 We do this by subsuming the unchanging part of
282 the generated suffix into the prefix (base), and
283 reinitializing the now one longer suffix. */
289 if (outfile_length + 1 < outbase_length)
291 outfile = xrealloc (outfile, outfile_length + 1);
294 memcpy (outfile, outbase, outbase_length);
297 /* Append the last alphabet character to the file name prefix. */
298 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
302 outfile_mid = outfile + outbase_length;
303 memset (outfile_mid, suffix_alphabet[0], suffix_length);
304 if (additional_suffix)
305 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
306 outfile[outfile_length] = 0;
309 sufindex = xcalloc (suffix_length, sizeof *sufindex);
311 if (numeric_suffix_start)
315 /* Update the output file name. */
316 size_t i = strlen (numeric_suffix_start);
317 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
319 /* Update the suffix index. */
320 size_t *sufindex_end = sufindex + suffix_length;
322 *--sufindex_end = numeric_suffix_start[i] - '0';
325 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
326 /* POSIX requires that if the output file name is too long for
327 its directory, 'split' must fail without creating any files.
328 This must be checked for explicitly on operating systems that
329 silently truncate file names. */
331 char *dir = dir_name (outfile);
332 long name_max = pathconf (dir, _PC_NAME_MAX);
333 if (0 <= name_max && name_max < base_len (last_component (outfile)))
334 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
341 /* Increment the suffix in place, if possible. */
343 size_t i = suffix_length;
347 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
349 outfile_mid[i] = suffix_alphabet[sufindex[i]];
353 outfile_mid[i] = suffix_alphabet[sufindex[i]];
355 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
359 /* Create or truncate a file. */
362 create (const char *name)
367 fprintf (stdout, _("creating file %s\n"), quote (name));
369 int fd = open (name, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO);
372 struct stat out_stat_buf;
373 if (fstat (fd, &out_stat_buf) != 0)
374 error (EXIT_FAILURE, errno, _("failed to stat %s"), quote (name));
375 if (SAME_INODE (in_stat_buf, out_stat_buf))
376 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
378 if (ftruncate (fd, 0) != 0)
379 error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (name));
387 char const *shell_prog = getenv ("SHELL");
388 if (shell_prog == NULL)
389 shell_prog = "/bin/sh";
390 if (setenv ("FILE", name, 1) != 0)
391 error (EXIT_FAILURE, errno,
392 _("failed to set FILE environment variable"));
394 fprintf (stdout, _("executing with FILE=%s\n"), quote (name));
395 if (pipe (fd_pair) != 0)
396 error (EXIT_FAILURE, errno, _("failed to create pipe"));
400 /* This is the child process. If an error occurs here, the
401 parent will eventually learn about it after doing a wait,
402 at which time it will emit its own error message. */
404 /* We have to close any pipes that were opened during an
405 earlier call, otherwise this process will be holding a
406 write-pipe that will prevent the earlier process from
407 reading an EOF on the corresponding read-pipe. */
408 for (j = 0; j < n_open_pipes; ++j)
409 if (close (open_pipes[j]) != 0)
410 error (EXIT_FAILURE, errno, _("closing prior pipe"));
411 if (close (fd_pair[1]))
412 error (EXIT_FAILURE, errno, _("closing output pipe"));
413 if (fd_pair[0] != STDIN_FILENO)
415 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
416 error (EXIT_FAILURE, errno, _("moving input pipe"));
417 if (close (fd_pair[0]) != 0)
418 error (EXIT_FAILURE, errno, _("closing input pipe"));
420 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
421 execl (shell_prog, last_component (shell_prog), "-c",
422 filter_command, (char *) NULL);
423 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
424 shell_prog, filter_command);
427 error (EXIT_FAILURE, errno, _("fork system call failed"));
428 if (close (fd_pair[0]) != 0)
429 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
430 filter_pid = child_pid;
431 if (n_open_pipes == open_pipes_alloc)
432 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
434 open_pipes[n_open_pipes++] = fd_pair[1];
439 /* Close the output file, and do any associated cleanup.
440 If FP and FD are both specified, they refer to the same open file;
441 in this case FP is closed, but FD is still used in cleanup. */
443 closeout (FILE *fp, int fd, pid_t pid, char const *name)
445 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
446 error (EXIT_FAILURE, errno, "%s", name);
449 if (fp == NULL && close (fd) < 0)
450 error (EXIT_FAILURE, errno, "%s", name);
452 for (j = 0; j < n_open_pipes; ++j)
454 if (open_pipes[j] == fd)
456 open_pipes[j] = open_pipes[--n_open_pipes];
464 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
465 error (EXIT_FAILURE, errno, _("waiting for child process"));
466 if (WIFSIGNALED (wstatus))
468 int sig = WTERMSIG (wstatus);
471 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
472 if (sig2str (sig, signame) != 0)
473 sprintf (signame, "%d", sig);
475 _("with FILE=%s, signal %s from command: %s"),
476 name, signame, filter_command);
479 else if (WIFEXITED (wstatus))
481 int ex = WEXITSTATUS (wstatus);
483 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
484 name, ex, filter_command);
488 /* shouldn't happen. */
489 error (EXIT_FAILURE, 0,
490 _("unknown status from command (0x%X)"), wstatus);
495 /* Write BYTES bytes at BP to an output file.
496 If NEW_FILE_FLAG is true, open the next output file.
497 Otherwise add to the same output file already in use. */
500 cwrite (bool new_file_flag, const char *bp, size_t bytes)
504 if (!bp && bytes == 0 && elide_empty_files)
506 closeout (NULL, output_desc, filter_pid, outfile);
508 if ((output_desc = create (outfile)) < 0)
509 error (EXIT_FAILURE, errno, "%s", outfile);
511 if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
512 error (EXIT_FAILURE, errno, "%s", outfile);
515 /* Split into pieces of exactly N_BYTES bytes.
516 Use buffer BUF, whose size is BUFSIZE. */
519 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
522 bool new_file_flag = true;
524 uintmax_t to_write = n_bytes;
526 uintmax_t opened = 0;
530 n_read = full_read (STDIN_FILENO, buf, bufsize);
531 if (n_read < bufsize && errno)
532 error (EXIT_FAILURE, errno, "%s", infile);
537 if (to_read < to_write)
539 if (to_read) /* do not write 0 bytes! */
541 cwrite (new_file_flag, bp_out, to_read);
542 opened += new_file_flag;
544 new_file_flag = false;
551 cwrite (new_file_flag, bp_out, w);
552 opened += new_file_flag;
553 new_file_flag = !max_files || (opened < max_files);
554 if (!new_file_flag && ignorable (errno))
556 /* If filter no longer accepting input, stop reading. */
566 while (n_read == bufsize);
568 /* Ensure NUMBER files are created, which truncates
569 any existing files or notifies any consumers on fifos.
570 FIXME: Should we do this before EXIT_FAILURE? */
571 while (opened++ < max_files)
572 cwrite (true, NULL, 0);
575 /* Split into pieces of exactly N_LINES lines.
576 Use buffer BUF, whose size is BUFSIZE. */
579 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
582 char *bp, *bp_out, *eob;
583 bool new_file_flag = true;
588 n_read = full_read (STDIN_FILENO, buf, bufsize);
589 if (n_read < bufsize && errno)
590 error (EXIT_FAILURE, errno, "%s", infile);
596 bp = memchr (bp, '\n', eob - bp + 1);
599 if (eob != bp_out) /* do not write 0 bytes! */
601 size_t len = eob - bp_out;
602 cwrite (new_file_flag, bp_out, len);
603 new_file_flag = false;
611 cwrite (new_file_flag, bp_out, bp - bp_out);
613 new_file_flag = true;
618 while (n_read == bufsize);
621 /* Split into pieces that are as large as possible while still not more
622 than N_BYTES bytes, and are split on line boundaries except
623 where lines longer than N_BYTES bytes occur.
624 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
625 buffer of size N_BYTES, in case N_BYTES is very large. */
628 line_bytes_split (size_t n_bytes)
632 size_t n_buffered = 0;
633 char *buf = xmalloc (n_bytes);
637 /* Fill up the full buffer size from the input file. */
639 size_t to_read = n_bytes - n_buffered;
640 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
641 if (n_read < to_read && errno)
642 error (EXIT_FAILURE, errno, "%s", infile);
644 n_buffered += n_read;
645 if (n_buffered != n_bytes)
652 /* Find where to end this chunk. */
653 bp = buf + n_buffered;
654 if (n_buffered == n_bytes)
656 while (bp > buf && bp[-1] != '\n')
660 /* If chunk has no newlines, use all the chunk. */
662 bp = buf + n_buffered;
664 /* Output the chars as one output file. */
665 cwrite (true, buf, bp - buf);
667 /* Discard the chars we just output; move rest of chunk
668 down to be the start of the next chunk. Source and
669 destination probably overlap. */
670 n_buffered -= bp - buf;
672 memmove (buf, bp, n_buffered);
678 /* -n l/[K/]N: Write lines to files of approximately file size / N.
679 The file is partitioned into file size / N sized portions, with the
680 last assigned any excess. If a line _starts_ within a partition
681 it is written completely to the corresponding file. Since lines
682 are not split even if they overlap a partition, the files written
683 can be larger or smaller than the partition size, and even empty
684 if a line is so long as to completely overlap the partition. */
687 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
690 assert (n && k <= n && n <= file_size);
692 const off_t chunk_size = file_size / n;
693 uintmax_t chunk_no = 1;
694 off_t chunk_end = chunk_size - 1;
696 bool new_file_flag = true;
697 bool chunk_truncated = false;
701 /* Start reading 1 byte before kth chunk of file. */
702 off_t start = (k - 1) * chunk_size - 1;
703 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
704 error (EXIT_FAILURE, errno, "%s", infile);
707 chunk_end = chunk_no * chunk_size - 1;
710 while (n_written < file_size)
712 char *bp = buf, *eob;
713 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
714 if (n_read < bufsize && errno)
715 error (EXIT_FAILURE, errno, "%s", infile);
716 else if (n_read == 0)
718 n_read = MIN (n_read, file_size - n_written);
719 chunk_truncated = false;
727 /* Begin looking for '\n' at last byte of chunk. */
728 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
729 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
734 to_write = bp_out - bp;
738 /* We don't use the stdout buffer here since we're writing
739 large chunks from an existing file, so it's more efficient
740 to write out directly. */
741 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
742 error (EXIT_FAILURE, errno, "%s", _("write error"));
745 cwrite (new_file_flag, bp, to_write);
746 n_written += to_write;
749 new_file_flag = next;
751 /* A line could have been so long that it skipped
752 entire chunks. So create empty files in that case. */
753 while (next || chunk_end <= n_written - 1)
755 if (!next && bp == eob)
757 /* replenish buf, before going to next chunk. */
758 chunk_truncated = true;
762 if (k && chunk_no > k)
765 chunk_end = file_size - 1; /* >= chunk_size. */
767 chunk_end += chunk_size;
768 if (chunk_end <= n_written - 1)
771 cwrite (true, NULL, 0);
782 /* Ensure NUMBER files are created, which truncates
783 any existing files or notifies any consumers on fifos.
784 FIXME: Should we do this before EXIT_FAILURE? */
785 while (!k && chunk_no++ <= n)
786 cwrite (true, NULL, 0);
789 /* -n K/N: Extract Kth of N chunks. */
792 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
798 assert (k && n && k <= n && n <= file_size);
800 start = (k - 1) * (file_size / n);
801 end = (k == n) ? file_size : k * (file_size / n);
803 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
804 error (EXIT_FAILURE, errno, "%s", infile);
808 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
809 if (n_read < bufsize && errno)
810 error (EXIT_FAILURE, errno, "%s", infile);
811 else if (n_read == 0)
813 n_read = MIN (n_read, end - start);
814 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
815 && ! ignorable (errno))
816 error (EXIT_FAILURE, errno, "%s", quote ("-"));
821 typedef struct of_info
835 /* Rotate file descriptors when we're writing to more output files than we
836 have available file descriptors.
837 Return whether we came under file resource pressure.
838 If so, it's probably best to close each file when finished with it. */
841 ofile_open (of_t *files, size_t i_check, size_t nfiles)
843 bool file_limit = false;
845 if (files[i_check].ofd <= OFD_NEW)
848 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
850 /* Another process could have opened a file in between the calls to
851 close and open, so we should keep trying until open succeeds or
852 we've closed all of our files. */
855 if (files[i_check].ofd == OFD_NEW)
856 fd = create (files[i_check].of_name);
857 else /* OFD_APPEND */
859 /* Attempt to append to previously opened file.
860 We use O_NONBLOCK to support writing to fifos,
861 where the other end has closed because of our
862 previous close. In that case we'll immediately
863 get an error, rather than waiting indefinitely.
864 In specialised cases the consumer can keep reading
865 from the fifo, terminating on conditions in the data
866 itself, or perhaps never in the case of 'tail -f'.
867 I.E. for fifos it is valid to attempt this reopen.
869 We don't handle the filter_command case here, as create()
870 will exit if there are not enough files in that case.
871 I.E. we don't support restarting filters, as that would
872 put too much burden on users specifying --filter commands. */
873 fd = open (files[i_check].of_name,
874 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
880 if (!(errno == EMFILE || errno == ENFILE))
881 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
885 /* Search backwards for an open file to close. */
886 while (files[i_reopen].ofd < 0)
888 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
889 /* No more open files to close, exit with E[NM]FILE. */
890 if (i_reopen == i_check)
891 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
894 if (fclose (files[i_reopen].ofile) != 0)
895 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
896 files[i_reopen].ofile = NULL;
897 files[i_reopen].ofd = OFD_APPEND;
900 files[i_check].ofd = fd;
901 if (!(files[i_check].ofile = fdopen (fd, "a")))
902 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
903 files[i_check].opid = filter_pid;
910 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
911 When K == 0, we try to keep the files open in parallel.
912 If we run out of file resources, then we revert
913 to opening and closing each file for each line. */
916 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
918 bool wrapped = false;
922 of_t *files IF_LINT (= NULL);
930 error (exit_failure, 0, "%s", _("memory exhausted"));
931 files = xnmalloc (n, sizeof *files);
933 /* Generate output file names. */
934 for (i_file = 0; i_file < n; i_file++)
937 files[i_file].of_name = xstrdup (outfile);
938 files[i_file].ofd = OFD_NEW;
939 files[i_file].ofile = NULL;
940 files[i_file].opid = 0;
948 char *bp = buf, *eob;
949 /* Use safe_read() rather than full_read() here
950 so that we process available data immediately. */
951 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
952 if (n_read == SAFE_READ_ERROR)
953 error (EXIT_FAILURE, errno, "%s", infile);
954 else if (n_read == 0)
963 /* Find end of line. */
964 char *bp_out = memchr (bp, '\n', eob - bp);
972 to_write = bp_out - bp;
976 if (line_no == k && unbuffered)
978 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
979 error (EXIT_FAILURE, errno, "%s", _("write error"));
981 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
983 clearerr (stdout); /* To silence close_stdout(). */
984 error (EXIT_FAILURE, errno, "%s", _("write error"));
987 line_no = (line_no == n) ? 1 : line_no + 1;
991 /* Secure file descriptor. */
992 file_limit |= ofile_open (files, i_file, n);
995 /* Note writing to fd, rather than flushing the FILE gives
996 an 8% performance benefit, due to reduced data copying. */
997 if (full_write (files[i_file].ofd, bp, to_write) != to_write
998 && ! ignorable (errno))
999 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1001 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1002 && ! ignorable (errno))
1003 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1004 if (! ignorable (errno))
1009 if (fclose (files[i_file].ofile) != 0)
1010 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1011 files[i_file].ofile = NULL;
1012 files[i_file].ofd = OFD_APPEND;
1014 if (next && ++i_file == n)
1017 /* If no filters are accepting input, stop reading. */
1030 /* Ensure all files created, so that any existing files are truncated,
1031 and to signal any waiting fifo consumers.
1032 Also, close any open file descriptors.
1033 FIXME: Should we do this before EXIT_FAILURE? */
1036 int ceiling = (wrapped ? n : i_file);
1037 for (i_file = 0; i_file < n; i_file++)
1039 if (i_file >= ceiling && !elide_empty_files)
1040 file_limit |= ofile_open (files, i_file, n);
1041 if (files[i_file].ofd >= 0)
1042 closeout (files[i_file].ofile, files[i_file].ofd,
1043 files[i_file].opid, files[i_file].of_name);
1044 files[i_file].ofd = OFD_APPEND;
1047 IF_LINT (free (files));
1050 #define FAIL_ONLY_ONE_WAY() \
1053 error (0, 0, _("cannot split in more than one way")); \
1054 usage (EXIT_FAILURE); \
1058 /* Parse K/N syntax of chunk options. */
1061 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
1064 if (xstrtoumax (slash + 1, NULL, 10, n_units, "") != LONGINT_OK
1066 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash + 1);
1067 if (slash != optarg /* a leading number is specified. */
1068 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
1069 || *k_units == 0 || *n_units < *k_units))
1070 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
1075 main (int argc, char **argv)
1077 enum Split_type split_type = type_undef;
1078 size_t in_blk_size = 0; /* optimal block size of input file device */
1079 size_t page_size = getpagesize ();
1080 uintmax_t k_units = 0;
1083 static char const multipliers[] = "bEGKkMmPTYZ0";
1085 int digits_optind = 0;
1086 off_t file_size IF_LINT (= 0);
1088 initialize_main (&argc, &argv);
1089 set_program_name (argv[0]);
1090 setlocale (LC_ALL, "");
1091 bindtextdomain (PACKAGE, LOCALEDIR);
1092 textdomain (PACKAGE);
1094 atexit (close_stdout);
1096 /* Parse command line options. */
1098 infile = bad_cast ("-");
1099 outbase = bad_cast ("x");
1103 /* This is the argv-index of the option we will read next. */
1104 int this_optind = optind ? optind : 1;
1107 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u",
1117 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
1118 || SIZE_MAX / sizeof (size_t) < tmp)
1120 error (0, 0, _("%s: invalid suffix length"), optarg);
1121 usage (EXIT_FAILURE);
1123 suffix_length = tmp;
1127 case ADDITIONAL_SUFFIX_OPTION:
1128 if (last_component (optarg) != optarg)
1131 _("invalid suffix %s, contains directory separator"),
1133 usage (EXIT_FAILURE);
1135 additional_suffix = optarg;
1139 if (split_type != type_undef)
1140 FAIL_ONLY_ONE_WAY ();
1141 split_type = type_bytes;
1142 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1145 error (0, 0, _("%s: invalid number of bytes"), optarg);
1146 usage (EXIT_FAILURE);
1148 /* If input is a pipe, we could get more data than is possible
1149 to write to a single file, so indicate that immediately
1150 rather than having possibly future invocations fail. */
1151 if (OFF_T_MAX < n_units)
1152 error (EXIT_FAILURE, EFBIG,
1153 _("%s: invalid number of bytes"), optarg);
1158 if (split_type != type_undef)
1159 FAIL_ONLY_ONE_WAY ();
1160 split_type = type_lines;
1161 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1164 error (0, 0, _("%s: invalid number of lines"), optarg);
1165 usage (EXIT_FAILURE);
1170 if (split_type != type_undef)
1171 FAIL_ONLY_ONE_WAY ();
1172 split_type = type_byteslines;
1173 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1174 || n_units == 0 || SIZE_MAX < n_units)
1176 error (0, 0, _("%s: invalid number of bytes"), optarg);
1177 usage (EXIT_FAILURE);
1179 if (OFF_T_MAX < n_units)
1180 error (EXIT_FAILURE, EFBIG,
1181 _("%s: invalid number of bytes"), optarg);
1185 if (split_type != type_undef)
1186 FAIL_ONLY_ONE_WAY ();
1187 /* skip any whitespace */
1188 while (isspace (to_uchar (*optarg)))
1190 if (STRNCMP_LIT (optarg, "r/") == 0)
1192 split_type = type_rr;
1195 else if (STRNCMP_LIT (optarg, "l/") == 0)
1197 split_type = type_chunk_lines;
1201 split_type = type_chunk_bytes;
1202 if ((slash = strchr (optarg, '/')))
1203 parse_chunk (&k_units, &n_units, slash);
1204 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1206 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
1223 if (split_type == type_undef)
1225 split_type = type_digits;
1228 if (split_type != type_undef && split_type != type_digits)
1229 FAIL_ONLY_ONE_WAY ();
1230 if (digits_optind != 0 && digits_optind != this_optind)
1231 n_units = 0; /* More than one number given; ignore other. */
1232 digits_optind = this_optind;
1233 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1235 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1236 error (EXIT_FAILURE, 0,
1237 _("line count option -%s%c... is too large"),
1238 umaxtostr (n_units, buffer), c);
1243 suffix_alphabet = "0123456789";
1246 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1249 _("%s: invalid start value for numerical suffix"),
1251 usage (EXIT_FAILURE);
1255 /* Skip any leading zero. */
1256 while (*optarg == '0' && *(optarg + 1) != '\0')
1258 numeric_suffix_start = optarg;
1264 elide_empty_files = true;
1268 filter_command = optarg;
1271 case IO_BLKSIZE_OPTION:
1273 uintmax_t tmp_blk_size;
1274 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
1275 multipliers) != LONGINT_OK
1276 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
1277 error (0, 0, _("%s: invalid IO block size"), optarg);
1279 in_blk_size = tmp_blk_size;
1283 case VERBOSE_OPTION:
1287 case_GETOPT_HELP_CHAR;
1289 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1292 usage (EXIT_FAILURE);
1296 if (k_units != 0 && filter_command)
1298 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1299 usage (EXIT_FAILURE);
1302 /* Handle default case. */
1303 if (split_type == type_undef)
1305 split_type = type_lines;
1311 error (0, 0, _("%s: invalid number of lines"), "0");
1312 usage (EXIT_FAILURE);
1315 set_suffix_length (n_units, split_type);
1317 /* Get out the filename arguments. */
1320 infile = argv[optind++];
1323 outbase = argv[optind++];
1327 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1328 usage (EXIT_FAILURE);
1331 /* Check that the suffix length is large enough for the numerical
1332 suffix start value. */
1333 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1335 error (0, 0, _("numerical suffix start value is too large "
1336 "for the suffix length"));
1337 usage (EXIT_FAILURE);
1340 /* Open the input file. */
1341 if (! STREQ (infile, "-")
1342 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1343 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1346 /* Binary I/O is safer when byte counts are used. */
1347 if (O_BINARY && ! isatty (STDIN_FILENO))
1348 xfreopen (NULL, "rb", stdin);
1350 /* Get the optimal block size of input device and make a buffer. */
1352 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1353 error (EXIT_FAILURE, errno, "%s", infile);
1354 if (in_blk_size == 0)
1355 in_blk_size = io_blksize (in_stat_buf);
1357 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1359 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1360 if (usable_st_size (&in_stat_buf))
1361 file_size = in_stat_buf.st_size;
1362 else if (0 <= input_offset)
1364 file_size = lseek (STDIN_FILENO, 0, SEEK_END);
1365 input_offset = (file_size < 0
1367 : lseek (STDIN_FILENO, input_offset, SEEK_SET));
1369 if (input_offset < 0)
1370 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1372 file_size -= input_offset;
1373 /* Overflow, and sanity checking. */
1374 if (OFF_T_MAX < n_units)
1376 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1377 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1378 umaxtostr (n_units, buffer));
1380 /* increase file_size to n_units here, so that we still process
1381 any input data, and create empty files for the rest. */
1382 file_size = MAX (file_size, n_units);
1385 void *b = xmalloc (in_blk_size + 1 + page_size - 1);
1386 char *buf = ptr_align (b, page_size);
1388 /* When filtering, closure of one pipe must not terminate the process,
1389 as there may still be other streams expecting input from us. */
1392 struct sigaction act;
1393 sigemptyset (&newblocked);
1394 sigaction (SIGPIPE, NULL, &act);
1395 if (act.sa_handler != SIG_IGN)
1396 sigaddset (&newblocked, SIGPIPE);
1397 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1404 lines_split (n_units, buf, in_blk_size);
1408 bytes_split (n_units, buf, in_blk_size, 0);
1411 case type_byteslines:
1412 line_bytes_split (n_units);
1415 case type_chunk_bytes:
1417 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1419 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1422 case type_chunk_lines:
1423 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1427 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1428 but the functionality is provided for symmetry. */
1429 lines_rr (k_units, n_units, buf, in_blk_size);
1438 if (close (STDIN_FILENO) != 0)
1439 error (EXIT_FAILURE, errno, "%s", infile);
1440 closeout (NULL, output_desc, filter_pid, outfile);
1442 exit (EXIT_SUCCESS);