1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
29 #include <sys/types.h>
34 #include "fd-reopen.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
40 #include "safe-read.h"
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "split"
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command;
55 /* Process ID of the filter. */
56 static int filter_pid;
58 /* Array of open pipes. */
59 static int *open_pipes;
60 static size_t open_pipes_alloc;
61 static size_t n_open_pipes;
63 /* Blocked signals. */
64 static sigset_t oldblocked;
65 static sigset_t newblocked;
67 /* Base name of output files. */
68 static char const *outbase;
70 /* Name of output files. */
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid;
77 /* Generate new suffix when suffixes are exhausted. */
78 static bool suffix_auto = true;
80 /* Length of OUTFILE's suffix. */
81 static size_t suffix_length;
83 /* Alphabet of characters to use in suffix. */
84 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
86 /* Numerical suffix start value. */
87 static const char *numeric_suffix_start;
89 /* Additional suffix to append to output file names. */
90 static char const *additional_suffix;
92 /* Name of input file. May be "-". */
95 /* stat buf for input file. */
96 static struct stat in_stat_buf;
98 /* Descriptor on which output file is open. */
99 static int output_desc = -1;
101 /* If true, print a diagnostic on standard error just before each
102 output file is opened. */
105 /* If true, don't generate zero length output files. */
106 static bool elide_empty_files;
108 /* If true, in round robin mode, immediately copy
109 input to output, which is much slower, so disabled by default. */
110 static bool unbuffered;
112 /* The split mode to use. */
115 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
116 type_chunk_bytes, type_chunk_lines, type_rr
119 /* For long options that have no equivalent short option, use a
120 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
123 VERBOSE_OPTION = CHAR_MAX + 1,
126 ADDITIONAL_SUFFIX_OPTION
129 static struct option const longopts[] =
131 {"bytes", required_argument, NULL, 'b'},
132 {"lines", required_argument, NULL, 'l'},
133 {"line-bytes", required_argument, NULL, 'C'},
134 {"number", required_argument, NULL, 'n'},
135 {"elide-empty-files", no_argument, NULL, 'e'},
136 {"unbuffered", no_argument, NULL, 'u'},
137 {"suffix-length", required_argument, NULL, 'a'},
138 {"additional-suffix", required_argument, NULL,
139 ADDITIONAL_SUFFIX_OPTION},
140 {"numeric-suffixes", optional_argument, NULL, 'd'},
141 {"filter", required_argument, NULL, FILTER_OPTION},
142 {"verbose", no_argument, NULL, VERBOSE_OPTION},
143 {"-io-blksize", required_argument, NULL,
144 IO_BLKSIZE_OPTION}, /* do not document */
145 {GETOPT_HELP_OPTION_DECL},
146 {GETOPT_VERSION_OPTION_DECL},
150 /* Return true if the errno value, ERR, is ignorable. */
154 return filter_command && err == EPIPE;
158 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
160 #define DEFAULT_SUFFIX_LENGTH 2
162 size_t suffix_needed = 0;
164 /* The suffix auto length feature is incompatible with
165 a user specified start value as the generated suffixes
166 are not all consecutive. */
167 if (numeric_suffix_start)
170 /* Auto-calculate the suffix length if the number of files is given. */
171 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
172 || split_type == type_rr)
174 size_t alphabet_len = strlen (suffix_alphabet);
175 bool alphabet_slop = (n_units % alphabet_len) != 0;
176 while (n_units /= alphabet_len)
178 suffix_needed += alphabet_slop;
182 if (suffix_length) /* set by user */
184 if (suffix_length < suffix_needed)
186 error (EXIT_FAILURE, 0,
187 _("the suffix length needs to be at least %zu"),
194 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
200 if (status != EXIT_SUCCESS)
205 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
209 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
210 size is 1000 lines, and default PREFIX is 'x'. With no INPUT, or when INPUT\n\
211 is -, read standard input.\n\
214 emit_mandatory_arg_note ();
216 fprintf (stdout, _("\
217 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
218 --additional-suffix=SUFFIX append an additional SUFFIX to file names.\n\
219 -b, --bytes=SIZE put SIZE bytes per output file\n\
220 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
221 -d, --numeric-suffixes[=FROM] use numeric suffixes instead of alphabetic.\n\
222 FROM changes the start value (default 0).\n\
223 -e, --elide-empty-files do not generate empty output files with '-n'\n\
224 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
225 -l, --lines=NUMBER put NUMBER lines per output file\n\
226 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
227 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
228 "), DEFAULT_SUFFIX_LENGTH);
230 --verbose print a diagnostic just before each\n\
231 output file is opened\n\
233 fputs (HELP_OPTION_DESCRIPTION, stdout);
234 fputs (VERSION_OPTION_DESCRIPTION, stdout);
238 N split into N files based on size of input\n\
239 K/N output Kth of N to stdout\n\
240 l/N split into N files without splitting lines\n\
241 l/K/N output Kth of N to stdout without splitting lines\n\
242 r/N like 'l' but use round robin distribution\n\
243 r/K/N likewise but only output Kth of N to stdout\n\
245 emit_ancillary_info ();
250 /* Compute the next sequential output file name and store it into the
254 next_file_name (void)
256 /* Index in suffix_alphabet of each character in the suffix. */
257 static size_t *sufindex;
258 static size_t outbase_length;
259 static size_t outfile_length;
260 static size_t addsuf_length;
267 widen = !! outfile_length;
271 /* Allocate and initialize the first file name. */
273 outbase_length = strlen (outbase);
274 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
275 outfile_length = outbase_length + suffix_length + addsuf_length;
279 /* Reallocate and initialize a new wider file name.
280 We do this by subsuming the unchanging part of
281 the generated suffix into the prefix (base), and
282 reinitializing the now one longer suffix. */
288 if (outfile_length + 1 < outbase_length)
290 outfile = xrealloc (outfile, outfile_length + 1);
293 memcpy (outfile, outbase, outbase_length);
296 /* Append the last alphabet character to the file name prefix. */
297 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
301 outfile_mid = outfile + outbase_length;
302 memset (outfile_mid, suffix_alphabet[0], suffix_length);
303 if (additional_suffix)
304 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
305 outfile[outfile_length] = 0;
308 sufindex = xcalloc (suffix_length, sizeof *sufindex);
310 if (numeric_suffix_start)
314 /* Update the output file name. */
315 size_t i = strlen (numeric_suffix_start);
316 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
318 /* Update the suffix index. */
319 size_t *sufindex_end = sufindex + suffix_length;
321 *--sufindex_end = numeric_suffix_start[i] - '0';
324 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
325 /* POSIX requires that if the output file name is too long for
326 its directory, 'split' must fail without creating any files.
327 This must be checked for explicitly on operating systems that
328 silently truncate file names. */
330 char *dir = dir_name (outfile);
331 long name_max = pathconf (dir, _PC_NAME_MAX);
332 if (0 <= name_max && name_max < base_len (last_component (outfile)))
333 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
340 /* Increment the suffix in place, if possible. */
342 size_t i = suffix_length;
346 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
348 outfile_mid[i] = suffix_alphabet[sufindex[i]];
352 outfile_mid[i] = suffix_alphabet[sufindex[i]];
354 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
358 /* Create or truncate a file. */
361 create (const char *name)
366 fprintf (stdout, _("creating file %s\n"), quote (name));
368 int fd = open (name, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO);
371 struct stat out_stat_buf;
372 if (fstat (fd, &out_stat_buf) != 0)
373 error (EXIT_FAILURE, errno, _("failed to stat %s"), quote (name));
374 if (SAME_INODE (in_stat_buf, out_stat_buf))
375 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
377 if (ftruncate (fd, 0) != 0)
378 error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (name));
386 char const *shell_prog = getenv ("SHELL");
387 if (shell_prog == NULL)
388 shell_prog = "/bin/sh";
389 if (setenv ("FILE", name, 1) != 0)
390 error (EXIT_FAILURE, errno,
391 _("failed to set FILE environment variable"));
393 fprintf (stdout, _("executing with FILE=%s\n"), quote (name));
394 if (pipe (fd_pair) != 0)
395 error (EXIT_FAILURE, errno, _("failed to create pipe"));
399 /* This is the child process. If an error occurs here, the
400 parent will eventually learn about it after doing a wait,
401 at which time it will emit its own error message. */
403 /* We have to close any pipes that were opened during an
404 earlier call, otherwise this process will be holding a
405 write-pipe that will prevent the earlier process from
406 reading an EOF on the corresponding read-pipe. */
407 for (j = 0; j < n_open_pipes; ++j)
408 if (close (open_pipes[j]) != 0)
409 error (EXIT_FAILURE, errno, _("closing prior pipe"));
410 if (close (fd_pair[1]))
411 error (EXIT_FAILURE, errno, _("closing output pipe"));
412 if (fd_pair[0] != STDIN_FILENO)
414 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
415 error (EXIT_FAILURE, errno, _("moving input pipe"));
416 if (close (fd_pair[0]) != 0)
417 error (EXIT_FAILURE, errno, _("closing input pipe"));
419 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
420 execl (shell_prog, last_component (shell_prog), "-c",
421 filter_command, (char *) NULL);
422 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
423 shell_prog, filter_command);
426 error (EXIT_FAILURE, errno, _("fork system call failed"));
427 if (close (fd_pair[0]) != 0)
428 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
429 filter_pid = child_pid;
430 if (n_open_pipes == open_pipes_alloc)
431 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
433 open_pipes[n_open_pipes++] = fd_pair[1];
438 /* Close the output file, and do any associated cleanup.
439 If FP and FD are both specified, they refer to the same open file;
440 in this case FP is closed, but FD is still used in cleanup. */
442 closeout (FILE *fp, int fd, pid_t pid, char const *name)
444 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
445 error (EXIT_FAILURE, errno, "%s", name);
448 if (fp == NULL && close (fd) < 0)
449 error (EXIT_FAILURE, errno, "%s", name);
451 for (j = 0; j < n_open_pipes; ++j)
453 if (open_pipes[j] == fd)
455 open_pipes[j] = open_pipes[--n_open_pipes];
463 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
464 error (EXIT_FAILURE, errno, _("waiting for child process"));
465 if (WIFSIGNALED (wstatus))
467 int sig = WTERMSIG (wstatus);
470 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
471 if (sig2str (sig, signame) != 0)
472 sprintf (signame, "%d", sig);
474 _("with FILE=%s, signal %s from command: %s"),
475 name, signame, filter_command);
478 else if (WIFEXITED (wstatus))
480 int ex = WEXITSTATUS (wstatus);
482 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
483 name, ex, filter_command);
487 /* shouldn't happen. */
488 error (EXIT_FAILURE, 0,
489 _("unknown status from command (0x%X)"), wstatus);
494 /* Write BYTES bytes at BP to an output file.
495 If NEW_FILE_FLAG is true, open the next output file.
496 Otherwise add to the same output file already in use. */
499 cwrite (bool new_file_flag, const char *bp, size_t bytes)
503 if (!bp && bytes == 0 && elide_empty_files)
505 closeout (NULL, output_desc, filter_pid, outfile);
507 if ((output_desc = create (outfile)) < 0)
508 error (EXIT_FAILURE, errno, "%s", outfile);
510 if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
511 error (EXIT_FAILURE, errno, "%s", outfile);
514 /* Split into pieces of exactly N_BYTES bytes.
515 Use buffer BUF, whose size is BUFSIZE. */
518 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
521 bool new_file_flag = true;
523 uintmax_t to_write = n_bytes;
525 uintmax_t opened = 0;
529 n_read = full_read (STDIN_FILENO, buf, bufsize);
530 if (n_read < bufsize && errno)
531 error (EXIT_FAILURE, errno, "%s", infile);
536 if (to_read < to_write)
538 if (to_read) /* do not write 0 bytes! */
540 cwrite (new_file_flag, bp_out, to_read);
541 opened += new_file_flag;
543 new_file_flag = false;
550 cwrite (new_file_flag, bp_out, w);
551 opened += new_file_flag;
552 new_file_flag = !max_files || (opened < max_files);
553 if (!new_file_flag && ignorable (errno))
555 /* If filter no longer accepting input, stop reading. */
565 while (n_read == bufsize);
567 /* Ensure NUMBER files are created, which truncates
568 any existing files or notifies any consumers on fifos.
569 FIXME: Should we do this before EXIT_FAILURE? */
570 while (opened++ < max_files)
571 cwrite (true, NULL, 0);
574 /* Split into pieces of exactly N_LINES lines.
575 Use buffer BUF, whose size is BUFSIZE. */
578 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
581 char *bp, *bp_out, *eob;
582 bool new_file_flag = true;
587 n_read = full_read (STDIN_FILENO, buf, bufsize);
588 if (n_read < bufsize && errno)
589 error (EXIT_FAILURE, errno, "%s", infile);
595 bp = memchr (bp, '\n', eob - bp + 1);
598 if (eob != bp_out) /* do not write 0 bytes! */
600 size_t len = eob - bp_out;
601 cwrite (new_file_flag, bp_out, len);
602 new_file_flag = false;
610 cwrite (new_file_flag, bp_out, bp - bp_out);
612 new_file_flag = true;
617 while (n_read == bufsize);
620 /* Split into pieces that are as large as possible while still not more
621 than N_BYTES bytes, and are split on line boundaries except
622 where lines longer than N_BYTES bytes occur.
623 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
624 buffer of size N_BYTES, in case N_BYTES is very large. */
627 line_bytes_split (size_t n_bytes)
631 size_t n_buffered = 0;
632 char *buf = xmalloc (n_bytes);
636 /* Fill up the full buffer size from the input file. */
638 size_t to_read = n_bytes - n_buffered;
639 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
640 if (n_read < to_read && errno)
641 error (EXIT_FAILURE, errno, "%s", infile);
643 n_buffered += n_read;
644 if (n_buffered != n_bytes)
651 /* Find where to end this chunk. */
652 bp = buf + n_buffered;
653 if (n_buffered == n_bytes)
655 while (bp > buf && bp[-1] != '\n')
659 /* If chunk has no newlines, use all the chunk. */
661 bp = buf + n_buffered;
663 /* Output the chars as one output file. */
664 cwrite (true, buf, bp - buf);
666 /* Discard the chars we just output; move rest of chunk
667 down to be the start of the next chunk. Source and
668 destination probably overlap. */
669 n_buffered -= bp - buf;
671 memmove (buf, bp, n_buffered);
677 /* -n l/[K/]N: Write lines to files of approximately file size / N.
678 The file is partitioned into file size / N sized portions, with the
679 last assigned any excess. If a line _starts_ within a partition
680 it is written completely to the corresponding file. Since lines
681 are not split even if they overlap a partition, the files written
682 can be larger or smaller than the partition size, and even empty
683 if a line is so long as to completely overlap the partition. */
686 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
689 assert (n && k <= n && n <= file_size);
691 const off_t chunk_size = file_size / n;
692 uintmax_t chunk_no = 1;
693 off_t chunk_end = chunk_size - 1;
695 bool new_file_flag = true;
696 bool chunk_truncated = false;
700 /* Start reading 1 byte before kth chunk of file. */
701 off_t start = (k - 1) * chunk_size - 1;
702 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
703 error (EXIT_FAILURE, errno, "%s", infile);
706 chunk_end = chunk_no * chunk_size - 1;
709 while (n_written < file_size)
711 char *bp = buf, *eob;
712 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
713 if (n_read < bufsize && errno)
714 error (EXIT_FAILURE, errno, "%s", infile);
715 else if (n_read == 0)
717 n_read = MIN (n_read, file_size - n_written);
718 chunk_truncated = false;
726 /* Begin looking for '\n' at last byte of chunk. */
727 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
728 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
733 to_write = bp_out - bp;
737 /* We don't use the stdout buffer here since we're writing
738 large chunks from an existing file, so it's more efficient
739 to write out directly. */
740 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
741 error (EXIT_FAILURE, errno, "%s", _("write error"));
744 cwrite (new_file_flag, bp, to_write);
745 n_written += to_write;
748 new_file_flag = next;
750 /* A line could have been so long that it skipped
751 entire chunks. So create empty files in that case. */
752 while (next || chunk_end <= n_written - 1)
754 if (!next && bp == eob)
756 /* replenish buf, before going to next chunk. */
757 chunk_truncated = true;
761 if (k && chunk_no > k)
764 chunk_end = file_size - 1; /* >= chunk_size. */
766 chunk_end += chunk_size;
767 if (chunk_end <= n_written - 1)
770 cwrite (true, NULL, 0);
781 /* Ensure NUMBER files are created, which truncates
782 any existing files or notifies any consumers on fifos.
783 FIXME: Should we do this before EXIT_FAILURE? */
784 while (!k && chunk_no++ <= n)
785 cwrite (true, NULL, 0);
788 /* -n K/N: Extract Kth of N chunks. */
791 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
797 assert (k && n && k <= n && n <= file_size);
799 start = (k - 1) * (file_size / n);
800 end = (k == n) ? file_size : k * (file_size / n);
802 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
803 error (EXIT_FAILURE, errno, "%s", infile);
807 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
808 if (n_read < bufsize && errno)
809 error (EXIT_FAILURE, errno, "%s", infile);
810 else if (n_read == 0)
812 n_read = MIN (n_read, end - start);
813 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
814 && ! ignorable (errno))
815 error (EXIT_FAILURE, errno, "%s", quote ("-"));
820 typedef struct of_info
834 /* Rotate file descriptors when we're writing to more output files than we
835 have available file descriptors.
836 Return whether we came under file resource pressure.
837 If so, it's probably best to close each file when finished with it. */
840 ofile_open (of_t *files, size_t i_check, size_t nfiles)
842 bool file_limit = false;
844 if (files[i_check].ofd <= OFD_NEW)
847 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
849 /* Another process could have opened a file in between the calls to
850 close and open, so we should keep trying until open succeeds or
851 we've closed all of our files. */
854 if (files[i_check].ofd == OFD_NEW)
855 fd = create (files[i_check].of_name);
856 else /* OFD_APPEND */
858 /* Attempt to append to previously opened file.
859 We use O_NONBLOCK to support writing to fifos,
860 where the other end has closed because of our
861 previous close. In that case we'll immediately
862 get an error, rather than waiting indefinitely.
863 In specialised cases the consumer can keep reading
864 from the fifo, terminating on conditions in the data
865 itself, or perhaps never in the case of 'tail -f'.
866 I.E. for fifos it is valid to attempt this reopen.
868 We don't handle the filter_command case here, as create()
869 will exit if there are not enough files in that case.
870 I.E. we don't support restarting filters, as that would
871 put too much burden on users specifying --filter commands. */
872 fd = open (files[i_check].of_name,
873 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
879 if (!(errno == EMFILE || errno == ENFILE))
880 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
884 /* Search backwards for an open file to close. */
885 while (files[i_reopen].ofd < 0)
887 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
888 /* No more open files to close, exit with E[NM]FILE. */
889 if (i_reopen == i_check)
890 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
893 if (fclose (files[i_reopen].ofile) != 0)
894 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
895 files[i_reopen].ofile = NULL;
896 files[i_reopen].ofd = OFD_APPEND;
899 files[i_check].ofd = fd;
900 if (!(files[i_check].ofile = fdopen (fd, "a")))
901 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
902 files[i_check].opid = filter_pid;
909 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
910 When K == 0, we try to keep the files open in parallel.
911 If we run out of file resources, then we revert
912 to opening and closing each file for each line. */
915 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
917 bool wrapped = false;
921 of_t *files IF_LINT (= NULL);
929 error (exit_failure, 0, "%s", _("memory exhausted"));
930 files = xnmalloc (n, sizeof *files);
932 /* Generate output file names. */
933 for (i_file = 0; i_file < n; i_file++)
936 files[i_file].of_name = xstrdup (outfile);
937 files[i_file].ofd = OFD_NEW;
938 files[i_file].ofile = NULL;
939 files[i_file].opid = 0;
947 char *bp = buf, *eob;
948 /* Use safe_read() rather than full_read() here
949 so that we process available data immediately. */
950 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
951 if (n_read == SAFE_READ_ERROR)
952 error (EXIT_FAILURE, errno, "%s", infile);
953 else if (n_read == 0)
962 /* Find end of line. */
963 char *bp_out = memchr (bp, '\n', eob - bp);
971 to_write = bp_out - bp;
975 if (line_no == k && unbuffered)
977 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
978 error (EXIT_FAILURE, errno, "%s", _("write error"));
980 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
982 clearerr (stdout); /* To silence close_stdout(). */
983 error (EXIT_FAILURE, errno, "%s", _("write error"));
986 line_no = (line_no == n) ? 1 : line_no + 1;
990 /* Secure file descriptor. */
991 file_limit |= ofile_open (files, i_file, n);
994 /* Note writing to fd, rather than flushing the FILE gives
995 an 8% performance benefit, due to reduced data copying. */
996 if (full_write (files[i_file].ofd, bp, to_write) != to_write
997 && ! ignorable (errno))
998 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1000 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1001 && ! ignorable (errno))
1002 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1003 if (! ignorable (errno))
1008 if (fclose (files[i_file].ofile) != 0)
1009 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1010 files[i_file].ofile = NULL;
1011 files[i_file].ofd = OFD_APPEND;
1013 if (next && ++i_file == n)
1016 /* If no filters are accepting input, stop reading. */
1029 /* Ensure all files created, so that any existing files are truncated,
1030 and to signal any waiting fifo consumers.
1031 Also, close any open file descriptors.
1032 FIXME: Should we do this before EXIT_FAILURE? */
1035 int ceiling = (wrapped ? n : i_file);
1036 for (i_file = 0; i_file < n; i_file++)
1038 if (i_file >= ceiling && !elide_empty_files)
1039 file_limit |= ofile_open (files, i_file, n);
1040 if (files[i_file].ofd >= 0)
1041 closeout (files[i_file].ofile, files[i_file].ofd,
1042 files[i_file].opid, files[i_file].of_name);
1043 files[i_file].ofd = OFD_APPEND;
1046 IF_LINT (free (files));
1049 #define FAIL_ONLY_ONE_WAY() \
1052 error (0, 0, _("cannot split in more than one way")); \
1053 usage (EXIT_FAILURE); \
1057 /* Parse K/N syntax of chunk options. */
1060 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
1063 if (xstrtoumax (slash + 1, NULL, 10, n_units, "") != LONGINT_OK
1065 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash + 1);
1066 if (slash != optarg /* a leading number is specified. */
1067 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
1068 || *k_units == 0 || *n_units < *k_units))
1069 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
1074 main (int argc, char **argv)
1076 enum Split_type split_type = type_undef;
1077 size_t in_blk_size = 0; /* optimal block size of input file device */
1078 size_t page_size = getpagesize ();
1079 uintmax_t k_units = 0;
1082 static char const multipliers[] = "bEGKkMmPTYZ0";
1084 int digits_optind = 0;
1085 off_t file_size IF_LINT (= 0);
1087 initialize_main (&argc, &argv);
1088 set_program_name (argv[0]);
1089 setlocale (LC_ALL, "");
1090 bindtextdomain (PACKAGE, LOCALEDIR);
1091 textdomain (PACKAGE);
1093 atexit (close_stdout);
1095 /* Parse command line options. */
1097 infile = bad_cast ("-");
1098 outbase = bad_cast ("x");
1102 /* This is the argv-index of the option we will read next. */
1103 int this_optind = optind ? optind : 1;
1106 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u",
1116 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
1117 || SIZE_MAX / sizeof (size_t) < tmp)
1119 error (0, 0, _("%s: invalid suffix length"), optarg);
1120 usage (EXIT_FAILURE);
1122 suffix_length = tmp;
1126 case ADDITIONAL_SUFFIX_OPTION:
1127 if (last_component (optarg) != optarg)
1130 _("invalid suffix %s, contains directory separator"),
1132 usage (EXIT_FAILURE);
1134 additional_suffix = optarg;
1138 if (split_type != type_undef)
1139 FAIL_ONLY_ONE_WAY ();
1140 split_type = type_bytes;
1141 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1144 error (0, 0, _("%s: invalid number of bytes"), optarg);
1145 usage (EXIT_FAILURE);
1147 /* If input is a pipe, we could get more data than is possible
1148 to write to a single file, so indicate that immediately
1149 rather than having possibly future invocations fail. */
1150 if (OFF_T_MAX < n_units)
1151 error (EXIT_FAILURE, EFBIG,
1152 _("%s: invalid number of bytes"), optarg);
1157 if (split_type != type_undef)
1158 FAIL_ONLY_ONE_WAY ();
1159 split_type = type_lines;
1160 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1163 error (0, 0, _("%s: invalid number of lines"), optarg);
1164 usage (EXIT_FAILURE);
1169 if (split_type != type_undef)
1170 FAIL_ONLY_ONE_WAY ();
1171 split_type = type_byteslines;
1172 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1173 || n_units == 0 || SIZE_MAX < n_units)
1175 error (0, 0, _("%s: invalid number of bytes"), optarg);
1176 usage (EXIT_FAILURE);
1178 if (OFF_T_MAX < n_units)
1179 error (EXIT_FAILURE, EFBIG,
1180 _("%s: invalid number of bytes"), optarg);
1184 if (split_type != type_undef)
1185 FAIL_ONLY_ONE_WAY ();
1186 /* skip any whitespace */
1187 while (isspace (to_uchar (*optarg)))
1189 if (STRNCMP_LIT (optarg, "r/") == 0)
1191 split_type = type_rr;
1194 else if (STRNCMP_LIT (optarg, "l/") == 0)
1196 split_type = type_chunk_lines;
1200 split_type = type_chunk_bytes;
1201 if ((slash = strchr (optarg, '/')))
1202 parse_chunk (&k_units, &n_units, slash);
1203 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1205 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
1222 if (split_type == type_undef)
1224 split_type = type_digits;
1227 if (split_type != type_undef && split_type != type_digits)
1228 FAIL_ONLY_ONE_WAY ();
1229 if (digits_optind != 0 && digits_optind != this_optind)
1230 n_units = 0; /* More than one number given; ignore other. */
1231 digits_optind = this_optind;
1232 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1234 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1235 error (EXIT_FAILURE, 0,
1236 _("line count option -%s%c... is too large"),
1237 umaxtostr (n_units, buffer), c);
1242 suffix_alphabet = "0123456789";
1245 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1248 _("%s: invalid start value for numerical suffix"),
1250 usage (EXIT_FAILURE);
1254 /* Skip any leading zero. */
1255 while (*optarg == '0' && *(optarg + 1) != '\0')
1257 numeric_suffix_start = optarg;
1263 elide_empty_files = true;
1267 filter_command = optarg;
1270 case IO_BLKSIZE_OPTION:
1272 uintmax_t tmp_blk_size;
1273 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
1274 multipliers) != LONGINT_OK
1275 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
1276 error (0, 0, _("%s: invalid IO block size"), optarg);
1278 in_blk_size = tmp_blk_size;
1282 case VERBOSE_OPTION:
1286 case_GETOPT_HELP_CHAR;
1288 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1291 usage (EXIT_FAILURE);
1295 if (k_units != 0 && filter_command)
1297 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1298 usage (EXIT_FAILURE);
1301 /* Handle default case. */
1302 if (split_type == type_undef)
1304 split_type = type_lines;
1310 error (0, 0, _("%s: invalid number of lines"), "0");
1311 usage (EXIT_FAILURE);
1314 set_suffix_length (n_units, split_type);
1316 /* Get out the filename arguments. */
1319 infile = argv[optind++];
1322 outbase = argv[optind++];
1326 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1327 usage (EXIT_FAILURE);
1330 /* Check that the suffix length is large enough for the numerical
1331 suffix start value. */
1332 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1334 error (0, 0, _("numerical suffix start value is too large "
1335 "for the suffix length"));
1336 usage (EXIT_FAILURE);
1339 /* Open the input file. */
1340 if (! STREQ (infile, "-")
1341 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1342 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1345 /* Binary I/O is safer when byte counts are used. */
1346 if (O_BINARY && ! isatty (STDIN_FILENO))
1347 xfreopen (NULL, "rb", stdin);
1349 /* Get the optimal block size of input device and make a buffer. */
1351 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1352 error (EXIT_FAILURE, errno, "%s", infile);
1353 if (in_blk_size == 0)
1354 in_blk_size = io_blksize (in_stat_buf);
1356 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1358 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1359 if (usable_st_size (&in_stat_buf))
1360 file_size = in_stat_buf.st_size;
1361 else if (0 <= input_offset)
1363 file_size = lseek (STDIN_FILENO, 0, SEEK_END);
1364 input_offset = (file_size < 0
1366 : lseek (STDIN_FILENO, input_offset, SEEK_SET));
1368 if (input_offset < 0)
1369 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1371 file_size -= input_offset;
1372 /* Overflow, and sanity checking. */
1373 if (OFF_T_MAX < n_units)
1375 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1376 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1377 umaxtostr (n_units, buffer));
1379 /* increase file_size to n_units here, so that we still process
1380 any input data, and create empty files for the rest. */
1381 file_size = MAX (file_size, n_units);
1384 void *b = xmalloc (in_blk_size + 1 + page_size - 1);
1385 char *buf = ptr_align (b, page_size);
1387 /* When filtering, closure of one pipe must not terminate the process,
1388 as there may still be other streams expecting input from us. */
1391 struct sigaction act;
1392 sigemptyset (&newblocked);
1393 sigaction (SIGPIPE, NULL, &act);
1394 if (act.sa_handler != SIG_IGN)
1395 sigaddset (&newblocked, SIGPIPE);
1396 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1403 lines_split (n_units, buf, in_blk_size);
1407 bytes_split (n_units, buf, in_blk_size, 0);
1410 case type_byteslines:
1411 line_bytes_split (n_units);
1414 case type_chunk_bytes:
1416 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1418 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1421 case type_chunk_lines:
1422 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1426 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1427 but the functionality is provided for symmetry. */
1428 lines_rr (k_units, n_units, buf, in_blk_size);
1437 if (close (STDIN_FILENO) != 0)
1438 error (EXIT_FAILURE, errno, "%s", infile);
1439 closeout (NULL, output_desc, filter_pid, outfile);
1441 exit (EXIT_SUCCESS);