1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
33 #include "full-write.h"
34 #include "safe-read.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "split"
40 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
42 /* The name this program was run with. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end;
55 /* Name of input file. May be "-". */
58 /* Descriptor on which input file is open. */
59 static int input_desc;
61 /* Descriptor on which output file is open. */
62 static int output_desc;
64 /* If nonzero, print a diagnostic on standard error just before each
65 output file is opened. */
68 static struct option const longopts[] =
70 {"bytes", required_argument, NULL, 'b'},
71 {"lines", required_argument, NULL, 'l'},
72 {"line-bytes", required_argument, NULL, 'C'},
73 {"verbose", no_argument, NULL, 2},
74 {GETOPT_HELP_OPTION_DECL},
75 {GETOPT_VERSION_OPTION_DECL},
83 fprintf (stderr, _("Try `%s --help' for more information.\n"),
88 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
92 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
93 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
97 Mandatory arguments to long options are mandatory for short options too.\n\
100 -b, --bytes=SIZE put SIZE bytes per output file\n\
101 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
102 -l, --lines=NUMBER put NUMBER lines per output file\n\
103 -NUMBER same as -l NUMBER\n\
104 --verbose print a diagnostic to standard error just\n\
105 before each output file is opened\n\
108 --help display this help and exit\n\
109 --version output version information and exit\n\
113 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
115 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
117 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
120 /* Compute the next sequential output file name suffix and store it
121 into the string `outfile' at the position pointed to by `outfile_mid'. */
124 next_file_name (void)
126 static unsigned n_digits = 2;
129 /* Change any suffix of `z's to `a's. */
130 for (p = outfile_end - 1; *p == 'z'; p--)
135 /* Increment the rightmost non-`z' character that was present before the
136 above z/a substitutions. There is guaranteed to be such a character. */
139 /* If the result of that increment operation yielded a `z' and there
140 are only `z's to the left of it, then append two more `a' characters
141 to the end and add 1 (-1 + 2) to the number of digits (we're taking
142 out this `z' and adding two `a's). */
143 if (*p == 'z' && p == outfile_mid)
147 *outfile_end++ = 'a';
148 *outfile_end++ = 'a';
152 /* Write BYTES bytes at BP to an output file.
153 If NEW_FILE_FLAG is nonzero, open the next output file.
154 Otherwise add to the same output file already in use. */
157 cwrite (int new_file_flag, const char *bp, int bytes)
161 if (output_desc >= 0 && close (output_desc) < 0)
162 error (EXIT_FAILURE, errno, "%s", outfile);
166 fprintf (stderr, _("creating file `%s'\n"), outfile);
167 output_desc = open (outfile,
168 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
170 error (EXIT_FAILURE, errno, "%s", outfile);
172 if (full_write (output_desc, bp, bytes) != bytes)
173 error (EXIT_FAILURE, errno, "%s", outfile);
176 /* Read NCHARS bytes from the input file into BUF.
177 Return the number of bytes successfully read.
178 If this is less than NCHARS, do not call `stdread' again. */
181 stdread (char *buf, int nchars)
184 int to_be_read = nchars;
188 n_read = safe_read (input_desc, buf, to_be_read);
193 to_be_read -= n_read;
196 return nchars - to_be_read;
199 /* Split into pieces of exactly NCHARS bytes.
200 Use buffer BUF, whose size is BUFSIZE. */
203 bytes_split (int nchars, char *buf, int bufsize)
206 int new_file_flag = 1;
208 int to_write = nchars;
213 n_read = stdread (buf, bufsize);
215 error (EXIT_FAILURE, errno, "%s", infile);
220 if (to_read < to_write)
222 if (to_read) /* do not write 0 bytes! */
224 cwrite (new_file_flag, bp_out, to_read);
232 cwrite (new_file_flag, bp_out, to_write);
240 while (n_read == bufsize);
243 /* Split into pieces of exactly NLINES lines.
244 Use buffer BUF, whose size is BUFSIZE. */
247 lines_split (int nlines, char *buf, int bufsize)
250 char *bp, *bp_out, *eob;
251 int new_file_flag = 1;
256 n_read = stdread (buf, bufsize);
258 error (EXIT_FAILURE, errno, "%s", infile);
264 while (*bp++ != '\n')
265 ; /* this semicolon takes most of the time */
268 if (eob != bp_out) /* do not write 0 bytes! */
270 cwrite (new_file_flag, bp_out, eob - bp_out);
278 cwrite (new_file_flag, bp_out, bp - bp_out);
285 while (n_read == bufsize);
288 /* Split into pieces that are as large as possible while still not more
289 than NCHARS bytes, and are split on line boundaries except
290 where lines longer than NCHARS bytes occur. */
293 line_bytes_split (int nchars)
299 char *buf = (char *) xmalloc (nchars);
303 /* Fill up the full buffer size from the input file. */
305 n_read = stdread (buf + n_buffered, nchars - n_buffered);
307 error (EXIT_FAILURE, errno, "%s", infile);
309 n_buffered += n_read;
310 if (n_buffered != nchars)
313 /* Find where to end this chunk. */
314 bp = buf + n_buffered;
315 if (n_buffered == nchars)
317 while (bp > buf && bp[-1] != '\n')
321 /* If chunk has no newlines, use all the chunk. */
323 bp = buf + n_buffered;
325 /* Output the chars as one output file. */
326 cwrite (1, buf, bp - buf);
328 /* Discard the chars we just output; move rest of chunk
329 down to be the start of the next chunk. Source and
330 destination probably overlap. */
331 n_buffered -= bp - buf;
333 memmove (buf, bp, n_buffered);
340 main (int argc, char **argv)
342 struct stat stat_buf;
343 int num; /* numeric argument from command line */
346 type_undef, type_bytes, type_byteslines, type_lines, type_digits
347 } split_type = type_undef;
348 int in_blk_size; /* optimal block size of input file device */
349 char *buf; /* file i/o buffer */
353 int digits_optind = 0;
355 program_name = argv[0];
356 setlocale (LC_ALL, "");
357 bindtextdomain (PACKAGE, LOCALEDIR);
358 textdomain (PACKAGE);
360 atexit (close_stdout);
362 /* Parse command line options. */
369 /* This is the argv-index of the option we will read next. */
370 int this_optind = optind ? optind : 1;
373 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
383 if (split_type != type_undef)
385 error (0, 0, _("cannot split in more than one way"));
386 usage (EXIT_FAILURE);
388 split_type = type_bytes;
389 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
390 || tmp_long < 0 || tmp_long > INT_MAX)
392 error (0, 0, _("%s: invalid number of bytes"), optarg);
393 usage (EXIT_FAILURE);
395 accum = (int) tmp_long;
399 if (split_type != type_undef)
401 error (0, 0, _("cannot split in more than one way"));
402 usage (EXIT_FAILURE);
404 split_type = type_lines;
405 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
406 || tmp_long < 0 || tmp_long > INT_MAX)
408 error (0, 0, _("%s: invalid number of lines"), optarg);
409 usage (EXIT_FAILURE);
411 accum = (int) tmp_long;
415 if (split_type != type_undef)
417 error (0, 0, _("cannot split in more than one way"));
418 usage (EXIT_FAILURE);
421 split_type = type_byteslines;
422 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
423 || tmp_long < 0 || tmp_long > INT_MAX)
425 error (0, 0, _("%s: invalid number of bytes"), optarg);
426 usage (EXIT_FAILURE);
428 accum = (int) tmp_long;
441 if (split_type != type_undef && split_type != type_digits)
443 error (0, 0, _("cannot split in more than one way"));
444 usage (EXIT_FAILURE);
446 if (digits_optind != 0 && digits_optind != this_optind)
447 accum = 0; /* More than one number given; ignore other. */
448 digits_optind = this_optind;
449 split_type = type_digits;
450 accum = accum * 10 + c - '0';
457 case_GETOPT_HELP_CHAR;
459 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
462 usage (EXIT_FAILURE);
466 /* Handle default case. */
467 if (split_type == type_undef)
469 split_type = type_lines;
475 error (0, 0, _("invalid number"));
476 usage (EXIT_FAILURE);
480 /* Get out the filename arguments. */
483 infile = argv[optind++];
486 outbase = argv[optind++];
490 error (0, 0, _("too many arguments"));
491 usage (EXIT_FAILURE);
494 /* Open the input file. */
495 if (STREQ (infile, "-"))
499 input_desc = open (infile, O_RDONLY);
501 error (EXIT_FAILURE, errno, "%s", infile);
503 /* Binary I/O is safer when bytecounts are used. */
504 SET_BINARY (input_desc);
506 /* No output file is open now. */
509 /* Copy the output file prefix so we can add suffixes to it.
510 26**29 is certainly enough output files! */
512 outfile = xmalloc (strlen (outbase) + 30);
513 strcpy (outfile, outbase);
514 outfile_mid = outfile + strlen (outfile);
515 outfile_end = outfile_mid + 2;
516 memset (outfile_mid, 0, 30);
517 outfile_mid[0] = 'a';
518 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
520 /* Get the optimal block size of input device and make a buffer. */
522 if (fstat (input_desc, &stat_buf) < 0)
523 error (EXIT_FAILURE, errno, "%s", infile);
524 in_blk_size = ST_BLKSIZE (stat_buf);
526 buf = xmalloc (in_blk_size + 1);
532 lines_split (num, buf, in_blk_size);
536 bytes_split (num, buf, in_blk_size);
539 case type_byteslines:
540 line_bytes_split (num);
547 if (close (input_desc) < 0)
548 error (EXIT_FAILURE, errno, "%s", infile);
549 if (output_desc >= 0 && close (output_desc) < 0)
550 error (EXIT_FAILURE, errno, "%s", outfile);