1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
29 #include <sys/types.h>
34 #include "fd-reopen.h"
36 #include "full-read.h"
37 #include "full-write.h"
39 #include "safe-read.h"
44 /* The official name of this program (e.g., no `g' prefix). */
45 #define PROGRAM_NAME "split"
48 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
49 proper_name ("Richard M. Stallman")
51 /* Shell command to filter through, instead of creating files. */
52 static char const *filter_command;
54 /* Process ID of the filter. */
55 static int filter_pid;
57 /* Array of open pipes. */
58 static int *open_pipes;
59 static size_t open_pipes_alloc;
60 static size_t n_open_pipes;
62 /* Blocked signals. */
63 static sigset_t oldblocked;
64 static sigset_t newblocked;
66 /* Base name of output files. */
67 static char const *outbase;
69 /* Name of output files. */
72 /* Pointer to the end of the prefix in OUTFILE.
73 Suffixes are inserted here. */
74 static char *outfile_mid;
76 /* Length of OUTFILE's suffix. */
77 static size_t suffix_length;
79 /* Alphabet of characters to use in suffix. */
80 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
82 /* Name of input file. May be "-". */
85 /* Descriptor on which output file is open. */
86 static int output_desc = -1;
88 /* If true, print a diagnostic on standard error just before each
89 output file is opened. */
92 /* If true, don't generate zero length output files. */
93 static bool elide_empty_files;
95 /* If true, in round robin mode, immediately copy
96 input to output, which is much slower, so disabled by default. */
97 static bool unbuffered;
99 /* The split mode to use. */
102 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
103 type_chunk_bytes, type_chunk_lines, type_rr
106 /* For long options that have no equivalent short option, use a
107 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
110 VERBOSE_OPTION = CHAR_MAX + 1,
115 static struct option const longopts[] =
117 {"bytes", required_argument, NULL, 'b'},
118 {"lines", required_argument, NULL, 'l'},
119 {"line-bytes", required_argument, NULL, 'C'},
120 {"number", required_argument, NULL, 'n'},
121 {"elide-empty-files", no_argument, NULL, 'e'},
122 {"unbuffered", no_argument, NULL, 'u'},
123 {"suffix-length", required_argument, NULL, 'a'},
124 {"numeric-suffixes", no_argument, NULL, 'd'},
125 {"filter", required_argument, NULL, FILTER_OPTION},
126 {"verbose", no_argument, NULL, VERBOSE_OPTION},
127 {"-io-blksize", required_argument, NULL,
128 IO_BLKSIZE_OPTION}, /* do not document */
129 {GETOPT_HELP_OPTION_DECL},
130 {GETOPT_VERSION_OPTION_DECL},
134 /* Return true if the errno value, ERR, is ignorable. */
138 return filter_command && err == EPIPE;
142 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
144 #define DEFAULT_SUFFIX_LENGTH 2
146 size_t suffix_needed = 0;
148 /* Auto-calculate the suffix length if the number of files is given. */
149 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
150 || split_type == type_rr)
152 size_t alphabet_len = strlen (suffix_alphabet);
153 bool alphabet_slop = (n_units % alphabet_len) != 0;
154 while (n_units /= alphabet_len)
156 suffix_needed += alphabet_slop;
159 if (suffix_length) /* set by user */
161 if (suffix_length < suffix_needed)
163 error (EXIT_FAILURE, 0,
164 _("the suffix length needs to be at least %zu"),
170 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
176 if (status != EXIT_SUCCESS)
177 fprintf (stderr, _("Try `%s --help' for more information.\n"),
182 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
186 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
187 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
188 is -, read standard input.\n\
192 Mandatory arguments to long options are mandatory for short options too.\n\
194 fprintf (stdout, _("\
195 -a, --suffix-length=N use suffixes of length N (default %d)\n\
196 -b, --bytes=SIZE put SIZE bytes per output file\n\
197 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
198 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
199 -e, --elide-empty-files do not generate empty output files with `-n'\n\
200 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
201 -l, --lines=NUMBER put NUMBER lines per output file\n\
202 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
203 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
204 "), DEFAULT_SUFFIX_LENGTH);
206 --verbose print a diagnostic just before each\n\
207 output file is opened\n\
209 fputs (HELP_OPTION_DESCRIPTION, stdout);
210 fputs (VERSION_OPTION_DESCRIPTION, stdout);
214 N split into N files based on size of input\n\
215 K/N output Kth of N to stdout\n\
216 l/N split into N files without splitting lines\n\
217 l/K/N output Kth of N to stdout without splitting lines\n\
218 r/N like `l' but use round robin distribution\n\
219 r/K/N likewise but only output Kth of N to stdout\n\
221 emit_ancillary_info ();
226 /* Compute the next sequential output file name and store it into the
230 next_file_name (void)
232 /* Index in suffix_alphabet of each character in the suffix. */
233 static size_t *sufindex;
237 /* Allocate and initialize the first file name. */
239 size_t outbase_length = strlen (outbase);
240 size_t outfile_length = outbase_length + suffix_length;
241 if (outfile_length + 1 < outbase_length)
243 outfile = xmalloc (outfile_length + 1);
244 outfile_mid = outfile + outbase_length;
245 memcpy (outfile, outbase, outbase_length);
246 memset (outfile_mid, suffix_alphabet[0], suffix_length);
247 outfile[outfile_length] = 0;
248 sufindex = xcalloc (suffix_length, sizeof *sufindex);
250 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
251 /* POSIX requires that if the output file name is too long for
252 its directory, `split' must fail without creating any files.
253 This must be checked for explicitly on operating systems that
254 silently truncate file names. */
256 char *dir = dir_name (outfile);
257 long name_max = pathconf (dir, _PC_NAME_MAX);
258 if (0 <= name_max && name_max < base_len (last_component (outfile)))
259 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
266 /* Increment the suffix in place, if possible. */
268 size_t i = suffix_length;
272 outfile_mid[i] = suffix_alphabet[sufindex[i]];
276 outfile_mid[i] = suffix_alphabet[sufindex[i]];
278 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
282 /* Create or truncate a file. */
285 create (const char *name)
290 fprintf (stdout, _("creating file %s\n"), quote (name));
291 return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
292 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
298 char const *shell_prog = getenv ("SHELL");
299 if (shell_prog == NULL)
300 shell_prog = "/bin/sh";
301 if (setenv ("FILE", name, 1) != 0)
302 error (EXIT_FAILURE, errno,
303 _("failed to set FILE environment variable"));
305 fprintf (stdout, _("executing with FILE=%s\n"), quote (name));
306 if (pipe (fd_pair) != 0)
307 error (EXIT_FAILURE, errno, _("failed to create pipe"));
311 /* This is the child process. If an error occurs here, the
312 parent will eventually learn about it after doing a wait,
313 at which time it will emit its own error message. */
315 /* We have to close any pipes that were opened during an
316 earlier call, otherwise this process will be holding a
317 write-pipe that will prevent the earlier process from
318 reading an EOF on the corresponding read-pipe. */
319 for (j = 0; j < n_open_pipes; ++j)
320 if (close (open_pipes[j]) != 0)
321 error (EXIT_FAILURE, errno, _("closing prior pipe"));
322 if (close (fd_pair[1]))
323 error (EXIT_FAILURE, errno, _("closing output pipe"));
324 if (fd_pair[0] != STDIN_FILENO)
326 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
327 error (EXIT_FAILURE, errno, _("moving input pipe"));
328 if (close (fd_pair[0]) != 0)
329 error (EXIT_FAILURE, errno, _("closing input pipe"));
331 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
332 execl (shell_prog, last_component (shell_prog), "-c",
333 filter_command, (char *) NULL);
334 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
335 shell_prog, filter_command);
338 error (EXIT_FAILURE, errno, _("fork system call failed"));
339 if (close (fd_pair[0]) != 0)
340 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
341 filter_pid = child_pid;
342 if (n_open_pipes == open_pipes_alloc)
343 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
345 open_pipes[n_open_pipes++] = fd_pair[1];
350 /* Close the output file, and do any associated cleanup.
351 If FP and FD are both specified, they refer to the same open file;
352 in this case FP is closed, but FD is still used in cleanup. */
354 closeout (FILE *fp, int fd, pid_t pid, char const *name)
356 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
357 error (EXIT_FAILURE, errno, "%s", name);
360 if (fp == NULL && close (fd) < 0)
361 error (EXIT_FAILURE, errno, "%s", name);
363 for (j = 0; j < n_open_pipes; ++j)
365 if (open_pipes[j] == fd)
367 open_pipes[j] = open_pipes[--n_open_pipes];
375 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
376 error (EXIT_FAILURE, errno, _("waiting for child process"));
377 if (WIFSIGNALED (wstatus))
379 int sig = WTERMSIG (wstatus);
382 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
383 if (sig2str (sig, signame) != 0)
384 sprintf (signame, "%d", sig);
386 _("with FILE=%s, signal %s (%s) from command: %s"),
387 name, signame, strsignal (sig), filter_command);
390 else if (WIFEXITED (wstatus))
392 int ex = WEXITSTATUS (wstatus);
394 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
395 name, ex, filter_command);
399 /* shouldn't happen. */
400 error (EXIT_FAILURE, 0,
401 _("unknown status from command (0x%X)"), wstatus);
406 /* Write BYTES bytes at BP to an output file.
407 If NEW_FILE_FLAG is true, open the next output file.
408 Otherwise add to the same output file already in use. */
411 cwrite (bool new_file_flag, const char *bp, size_t bytes)
415 if (!bp && bytes == 0 && elide_empty_files)
417 closeout (NULL, output_desc, filter_pid, outfile);
419 if ((output_desc = create (outfile)) < 0)
420 error (EXIT_FAILURE, errno, "%s", outfile);
422 if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
423 error (EXIT_FAILURE, errno, "%s", outfile);
426 /* Split into pieces of exactly N_BYTES bytes.
427 Use buffer BUF, whose size is BUFSIZE. */
430 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
433 bool new_file_flag = true;
435 uintmax_t to_write = n_bytes;
437 uintmax_t opened = 0;
441 n_read = full_read (STDIN_FILENO, buf, bufsize);
442 if (n_read < bufsize && errno)
443 error (EXIT_FAILURE, errno, "%s", infile);
448 if (to_read < to_write)
450 if (to_read) /* do not write 0 bytes! */
452 cwrite (new_file_flag, bp_out, to_read);
453 opened += new_file_flag;
455 new_file_flag = false;
462 cwrite (new_file_flag, bp_out, w);
463 opened += new_file_flag;
464 new_file_flag = !max_files || (opened < max_files);
465 if (!new_file_flag && ignorable (errno))
467 /* If filter no longer accepting input, stop reading. */
477 while (n_read == bufsize);
479 /* Ensure NUMBER files are created, which truncates
480 any existing files or notifies any consumers on fifos.
481 FIXME: Should we do this before EXIT_FAILURE? */
482 while (opened++ < max_files)
483 cwrite (true, NULL, 0);
486 /* Split into pieces of exactly N_LINES lines.
487 Use buffer BUF, whose size is BUFSIZE. */
490 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
493 char *bp, *bp_out, *eob;
494 bool new_file_flag = true;
499 n_read = full_read (STDIN_FILENO, buf, bufsize);
500 if (n_read < bufsize && errno)
501 error (EXIT_FAILURE, errno, "%s", infile);
507 bp = memchr (bp, '\n', eob - bp + 1);
510 if (eob != bp_out) /* do not write 0 bytes! */
512 size_t len = eob - bp_out;
513 cwrite (new_file_flag, bp_out, len);
514 new_file_flag = false;
522 cwrite (new_file_flag, bp_out, bp - bp_out);
524 new_file_flag = true;
529 while (n_read == bufsize);
532 /* Split into pieces that are as large as possible while still not more
533 than N_BYTES bytes, and are split on line boundaries except
534 where lines longer than N_BYTES bytes occur.
535 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
536 buffer of size N_BYTES, in case N_BYTES is very large. */
539 line_bytes_split (size_t n_bytes)
543 size_t n_buffered = 0;
544 char *buf = xmalloc (n_bytes);
548 /* Fill up the full buffer size from the input file. */
550 size_t to_read = n_bytes - n_buffered;
551 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
552 if (n_read < to_read && errno)
553 error (EXIT_FAILURE, errno, "%s", infile);
555 n_buffered += n_read;
556 if (n_buffered != n_bytes)
563 /* Find where to end this chunk. */
564 bp = buf + n_buffered;
565 if (n_buffered == n_bytes)
567 while (bp > buf && bp[-1] != '\n')
571 /* If chunk has no newlines, use all the chunk. */
573 bp = buf + n_buffered;
575 /* Output the chars as one output file. */
576 cwrite (true, buf, bp - buf);
578 /* Discard the chars we just output; move rest of chunk
579 down to be the start of the next chunk. Source and
580 destination probably overlap. */
581 n_buffered -= bp - buf;
583 memmove (buf, bp, n_buffered);
589 /* -n l/[K/]N: Write lines to files of approximately file size / N.
590 The file is partitioned into file size / N sized portions, with the
591 last assigned any excess. If a line _starts_ within a partition
592 it is written completely to the corresponding file. Since lines
593 are not split even if they overlap a partition, the files written
594 can be larger or smaller than the partition size, and even empty
595 if a line is so long as to completely overlap the partition. */
598 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
601 assert (n && k <= n && n <= file_size);
603 const off_t chunk_size = file_size / n;
604 uintmax_t chunk_no = 1;
605 off_t chunk_end = chunk_size - 1;
607 bool new_file_flag = true;
608 bool chunk_truncated = false;
612 /* Start reading 1 byte before kth chunk of file. */
613 off_t start = (k - 1) * chunk_size - 1;
614 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
615 error (EXIT_FAILURE, errno, "%s", infile);
618 chunk_end = chunk_no * chunk_size - 1;
621 while (n_written < file_size)
623 char *bp = buf, *eob;
624 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
625 n_read = MIN (n_read, file_size - n_written);
626 if (n_read < bufsize && errno)
627 error (EXIT_FAILURE, errno, "%s", infile);
628 else if (n_read == 0)
630 chunk_truncated = false;
638 /* Begin looking for '\n' at last byte of chunk. */
639 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
640 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
645 to_write = bp_out - bp;
649 /* We don't use the stdout buffer here since we're writing
650 large chunks from an existing file, so it's more efficient
651 to write out directly. */
652 if (full_write (STDOUT_FILENO, bp, to_write) != to_write
653 && ! ignorable (errno))
654 error (EXIT_FAILURE, errno, "%s", _("write error"));
657 cwrite (new_file_flag, bp, to_write);
658 n_written += to_write;
661 new_file_flag = next;
663 /* A line could have been so long that it skipped
664 entire chunks. So create empty files in that case. */
665 while (next || chunk_end <= n_written - 1)
667 if (!next && bp == eob)
669 /* replenish buf, before going to next chunk. */
670 chunk_truncated = true;
674 if (k && chunk_no > k)
677 chunk_end = file_size - 1; /* >= chunk_size. */
679 chunk_end += chunk_size;
680 if (chunk_end <= n_written - 1)
683 cwrite (true, NULL, 0);
694 /* Ensure NUMBER files are created, which truncates
695 any existing files or notifies any consumers on fifos.
696 FIXME: Should we do this before EXIT_FAILURE? */
697 while (!k && chunk_no++ <= n)
698 cwrite (true, NULL, 0);
701 /* -n K/N: Extract Kth of N chunks. */
704 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
710 assert (k && n && k <= n && n <= file_size);
712 start = (k - 1) * (file_size / n);
713 end = (k == n) ? file_size : k * (file_size / n);
715 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
716 error (EXIT_FAILURE, errno, "%s", infile);
720 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
721 n_read = MIN (n_read, end - start);
722 if (n_read < bufsize && errno)
723 error (EXIT_FAILURE, errno, "%s", infile);
724 else if (n_read == 0)
726 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
727 && ! ignorable (errno))
728 error (EXIT_FAILURE, errno, "%s", quote ("-"));
733 typedef struct of_info
747 /* Rotate file descriptors when we're writing to more output files than we
748 have available file descriptors.
749 Return whether we came under file resource pressure.
750 If so, it's probably best to close each file when finished with it. */
753 ofile_open (of_t *files, size_t i_check, size_t nfiles)
755 bool file_limit = false;
757 if (files[i_check].ofd <= OFD_NEW)
760 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
762 /* Another process could have opened a file in between the calls to
763 close and open, so we should keep trying until open succeeds or
764 we've closed all of our files. */
767 if (files[i_check].ofd == OFD_NEW)
768 fd = create (files[i_check].of_name);
769 else /* OFD_APPEND */
771 /* Attempt to append to previously opened file.
772 We use O_NONBLOCK to support writing to fifos,
773 where the other end has closed because of our
774 previous close. In that case we'll immediately
775 get an error, rather than waiting indefinitely.
776 In specialised cases the consumer can keep reading
777 from the fifo, terminating on conditions in the data
778 itself, or perhaps never in the case of `tail -f`.
779 I.E. for fifos it is valid to attempt this reopen. */
780 fd = open (files[i_check].of_name,
781 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
787 if (!(errno == EMFILE || errno == ENFILE))
788 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
792 /* Search backwards for an open file to close. */
793 while (files[i_reopen].ofd < 0)
795 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
796 /* No more open files to close, exit with E[NM]FILE. */
797 if (i_reopen == i_check)
798 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
801 if (fclose (files[i_reopen].ofile) != 0 && ! ignorable (errno))
802 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
803 files[i_reopen].ofile = NULL;
804 files[i_reopen].ofd = OFD_APPEND;
807 files[i_check].ofd = fd;
808 if (!(files[i_check].ofile = fdopen (fd, "a")))
809 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
810 files[i_check].opid = filter_pid;
817 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
818 When K == 0, we try to keep the files open in parallel.
819 If we run out of file resources, then we revert
820 to opening and closing each file for each line. */
823 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
825 bool wrapped = false;
829 of_t *files IF_LINT (= NULL);
837 error (exit_failure, 0, "%s", _("memory exhausted"));
838 files = xnmalloc (n, sizeof *files);
840 /* Generate output file names. */
841 for (i_file = 0; i_file < n; i_file++)
844 files[i_file].of_name = xstrdup (outfile);
845 files[i_file].ofd = OFD_NEW;
846 files[i_file].ofile = NULL;
847 files[i_file].opid = 0;
855 char *bp = buf, *eob;
856 /* Use safe_read() rather than full_read() here
857 so that we process available data immediately. */
858 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
859 if (n_read == SAFE_READ_ERROR)
860 error (EXIT_FAILURE, errno, "%s", infile);
861 else if (n_read == 0)
870 /* Find end of line. */
871 char *bp_out = memchr (bp, '\n', eob - bp);
879 to_write = bp_out - bp;
883 if (line_no == k && unbuffered)
885 if (full_write (STDOUT_FILENO, bp, to_write) != to_write
886 && ! ignorable (errno))
887 error (EXIT_FAILURE, errno, "%s", _("write error"));
889 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1
890 && ! ignorable (errno))
892 clearerr (stdout); /* To silence close_stdout(). */
893 error (EXIT_FAILURE, errno, "%s", _("write error"));
896 line_no = (line_no == n) ? 1 : line_no + 1;
900 /* Secure file descriptor. */
901 file_limit |= ofile_open (files, i_file, n);
904 /* Note writing to fd, rather than flushing the FILE gives
905 an 8% performance benefit, due to reduced data copying. */
906 if (full_write (files[i_file].ofd, bp, to_write) != to_write
907 && ! ignorable (errno))
908 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
910 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
911 && ! ignorable (errno))
912 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
913 if (! ignorable (errno))
918 if (fclose (files[i_file].ofile) != 0 && ! ignorable (errno))
919 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
920 files[i_file].ofile = NULL;
921 files[i_file].ofd = OFD_APPEND;
923 if (next && ++i_file == n)
926 /* If no filters are accepting input, stop reading. */
939 /* Ensure all files created, so that any existing files are truncated,
940 and to signal any waiting fifo consumers.
941 Also, close any open file descriptors.
942 FIXME: Should we do this before EXIT_FAILURE? */
945 int ceiling = (wrapped ? n : i_file);
946 for (i_file = 0; i_file < n; i_file++)
948 if (i_file >= ceiling && !elide_empty_files)
949 file_limit |= ofile_open (files, i_file, n);
950 if (files[i_file].ofd >= 0)
951 closeout (files[i_file].ofile, files[i_file].ofd,
952 files[i_file].opid, files[i_file].of_name);
953 files[i_file].ofd = OFD_APPEND;
958 #define FAIL_ONLY_ONE_WAY() \
961 error (0, 0, _("cannot split in more than one way")); \
962 usage (EXIT_FAILURE); \
966 /* Parse K/N syntax of chunk options. */
969 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
972 if (xstrtoumax (slash + 1, NULL, 10, n_units, "") != LONGINT_OK
974 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash + 1);
975 if (slash != optarg /* a leading number is specified. */
976 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
977 || *k_units == 0 || *n_units < *k_units))
978 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
983 main (int argc, char **argv)
985 struct stat stat_buf;
986 enum Split_type split_type = type_undef;
987 size_t in_blk_size = 0; /* optimal block size of input file device */
988 char *buf; /* file i/o buffer */
989 size_t page_size = getpagesize ();
990 uintmax_t k_units = 0;
993 static char const multipliers[] = "bEGKkMmPTYZ0";
995 int digits_optind = 0;
998 initialize_main (&argc, &argv);
999 set_program_name (argv[0]);
1000 setlocale (LC_ALL, "");
1001 bindtextdomain (PACKAGE, LOCALEDIR);
1002 textdomain (PACKAGE);
1004 atexit (close_stdout);
1006 /* Parse command line options. */
1008 infile = bad_cast ("-");
1009 outbase = bad_cast ("x");
1013 /* This is the argv-index of the option we will read next. */
1014 int this_optind = optind ? optind : 1;
1017 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u",
1027 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
1028 || SIZE_MAX / sizeof (size_t) < tmp)
1030 error (0, 0, _("%s: invalid suffix length"), optarg);
1031 usage (EXIT_FAILURE);
1033 suffix_length = tmp;
1038 if (split_type != type_undef)
1039 FAIL_ONLY_ONE_WAY ();
1040 split_type = type_bytes;
1041 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1044 error (0, 0, _("%s: invalid number of bytes"), optarg);
1045 usage (EXIT_FAILURE);
1047 /* If input is a pipe, we could get more data than is possible
1048 to write to a single file, so indicate that immediately
1049 rather than having possibly future invocations fail. */
1050 if (OFF_T_MAX < n_units)
1051 error (EXIT_FAILURE, EFBIG,
1052 _("%s: invalid number of bytes"), optarg);
1057 if (split_type != type_undef)
1058 FAIL_ONLY_ONE_WAY ();
1059 split_type = type_lines;
1060 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1063 error (0, 0, _("%s: invalid number of lines"), optarg);
1064 usage (EXIT_FAILURE);
1069 if (split_type != type_undef)
1070 FAIL_ONLY_ONE_WAY ();
1071 split_type = type_byteslines;
1072 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1073 || n_units == 0 || SIZE_MAX < n_units)
1075 error (0, 0, _("%s: invalid number of bytes"), optarg);
1076 usage (EXIT_FAILURE);
1078 if (OFF_T_MAX < n_units)
1079 error (EXIT_FAILURE, EFBIG,
1080 _("%s: invalid number of bytes"), optarg);
1084 if (split_type != type_undef)
1085 FAIL_ONLY_ONE_WAY ();
1086 /* skip any whitespace */
1087 while (isspace (to_uchar (*optarg)))
1089 if (STRNCMP_LIT (optarg, "r/") == 0)
1091 split_type = type_rr;
1094 else if (STRNCMP_LIT (optarg, "l/") == 0)
1096 split_type = type_chunk_lines;
1100 split_type = type_chunk_bytes;
1101 if ((slash = strchr (optarg, '/')))
1102 parse_chunk (&k_units, &n_units, slash);
1103 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1105 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
1122 if (split_type == type_undef)
1124 split_type = type_digits;
1127 if (split_type != type_undef && split_type != type_digits)
1128 FAIL_ONLY_ONE_WAY ();
1129 if (digits_optind != 0 && digits_optind != this_optind)
1130 n_units = 0; /* More than one number given; ignore other. */
1131 digits_optind = this_optind;
1132 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1134 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1135 error (EXIT_FAILURE, 0,
1136 _("line count option -%s%c... is too large"),
1137 umaxtostr (n_units, buffer), c);
1142 suffix_alphabet = "0123456789";
1146 elide_empty_files = true;
1150 filter_command = optarg;
1153 case IO_BLKSIZE_OPTION:
1155 uintmax_t tmp_blk_size;
1156 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
1157 multipliers) != LONGINT_OK
1158 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
1159 error (0, 0, _("%s: invalid IO block size"), optarg);
1161 in_blk_size = tmp_blk_size;
1165 case VERBOSE_OPTION:
1169 case_GETOPT_HELP_CHAR;
1171 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1174 usage (EXIT_FAILURE);
1178 /* Handle default case. */
1179 if (split_type == type_undef)
1181 split_type = type_lines;
1187 error (0, 0, _("%s: invalid number of lines"), "0");
1188 usage (EXIT_FAILURE);
1191 set_suffix_length (n_units, split_type);
1193 /* Get out the filename arguments. */
1196 infile = argv[optind++];
1199 outbase = argv[optind++];
1203 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1204 usage (EXIT_FAILURE);
1207 /* Open the input file. */
1208 if (! STREQ (infile, "-")
1209 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1210 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1213 /* Binary I/O is safer when byte counts are used. */
1214 if (O_BINARY && ! isatty (STDIN_FILENO))
1215 xfreopen (NULL, "rb", stdin);
1217 /* Get the optimal block size of input device and make a buffer. */
1219 if (fstat (STDIN_FILENO, &stat_buf) != 0)
1220 error (EXIT_FAILURE, errno, "%s", infile);
1221 if (in_blk_size == 0)
1222 in_blk_size = io_blksize (stat_buf);
1223 file_size = stat_buf.st_size;
1225 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1227 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1228 if (input_offset < 0)
1229 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1231 file_size -= input_offset;
1232 /* Overflow, and sanity checking. */
1233 if (OFF_T_MAX < n_units)
1235 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1236 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1237 umaxtostr (n_units, buffer));
1239 /* increase file_size to n_units here, so that we still process
1240 any input data, and create empty files for the rest. */
1241 file_size = MAX (file_size, n_units);
1244 buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
1246 /* When filtering, closure of one pipe must not terminate the process,
1247 as there may still be other streams expecting input from us. */
1250 struct sigaction act;
1251 sigemptyset (&newblocked);
1252 sigaction (SIGPIPE, NULL, &act);
1253 if (act.sa_handler != SIG_IGN)
1254 sigaddset (&newblocked, SIGPIPE);
1255 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1262 lines_split (n_units, buf, in_blk_size);
1266 bytes_split (n_units, buf, in_blk_size, 0);
1269 case type_byteslines:
1270 line_bytes_split (n_units);
1273 case type_chunk_bytes:
1275 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1277 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1280 case type_chunk_lines:
1281 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1285 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1286 but the functionality is provided for symmetry. */
1287 lines_rr (k_units, n_units, buf, in_blk_size);
1294 if (close (STDIN_FILENO) != 0)
1295 error (EXIT_FAILURE, errno, "%s", infile);
1296 closeout (NULL, output_desc, filter_pid, outfile);
1298 exit (EXIT_SUCCESS);