1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
32 #include "long-options.h"
33 #include "safe-read.h"
38 /* The name this program was run with. */
41 /* Base name of output files. */
44 /* Pointer to the end of the prefix in OUTFILE.
45 Suffixes are inserted here. */
46 static char *outfile_mid;
48 /* Pointer to the end of OUTFILE. */
49 static char *outfile_end;
51 /* Name of input file. May be "-". */
54 /* Descriptor on which input file is open. */
55 static int input_desc;
57 /* Descriptor on which output file is open. */
58 static int output_desc;
60 /* If nonzero, print a diagnostic on standard error just before each
61 output file is opened. */
64 static struct option const longopts[] =
66 {"bytes", required_argument, NULL, 'b'},
67 {"lines", required_argument, NULL, 'l'},
68 {"line-bytes", required_argument, NULL, 'C'},
69 {"verbose", no_argument, NULL, 2},
77 fprintf (stderr, _("Try `%s --help' for more information.\n"),
82 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
86 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
87 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
89 -b, --bytes=SIZE put SIZE bytes per output file\n\
90 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
91 -l, --lines=NUMBER put NUMBER lines per output file\n\
92 -NUMBER same as -l NUMBER\n\
93 --verbose print a diagnostic to standard error just\n\
94 before each output file is opened\n\
95 --help display this help and exit\n\
96 --version output version information and exit\n\
98 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
100 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
102 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
105 /* Compute the next sequential output file name suffix and store it
106 into the string `outfile' at the position pointed to by `outfile_mid'. */
109 next_file_name (void)
111 static unsigned n_digits = 2;
114 /* Change any suffix of `z's to `a's. */
115 for (p = outfile_end - 1; *p == 'z'; p--)
120 /* Increment the rightmost non-`z' character that was present before the
121 above z/a substitutions. There is guaranteed to be such a character. */
124 /* If the result of that increment operation yielded a `z' and there
125 are only `z's to the left of it, then append two more `a' characters
126 to the end and add 1 (-1 + 2) to the number of digits (we're taking
127 out this `z' and adding two `a's). */
128 if (*p == 'z' && p == outfile_mid)
132 *outfile_end++ = 'a';
133 *outfile_end++ = 'a';
137 /* Write BYTES bytes at BP to an output file.
138 If NEW_FILE_FLAG is nonzero, open the next output file.
139 Otherwise add to the same output file already in use. */
142 cwrite (int new_file_flag, const char *bp, int bytes)
146 if (output_desc >= 0 && close (output_desc) < 0)
147 error (EXIT_FAILURE, errno, "%s", outfile);
151 fprintf (stderr, _("creating file `%s'\n"), outfile);
152 output_desc = open (outfile,
153 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
155 error (EXIT_FAILURE, errno, "%s", outfile);
157 if (full_write (output_desc, bp, bytes) < 0)
158 error (EXIT_FAILURE, errno, "%s", outfile);
161 /* Read NCHARS bytes from the input file into BUF.
162 Return the number of bytes successfully read.
163 If this is less than NCHARS, do not call `stdread' again. */
166 stdread (char *buf, int nchars)
169 int to_be_read = nchars;
173 n_read = safe_read (input_desc, buf, to_be_read);
178 to_be_read -= n_read;
181 return nchars - to_be_read;
184 /* Split into pieces of exactly NCHARS bytes.
185 Use buffer BUF, whose size is BUFSIZE. */
188 bytes_split (int nchars, char *buf, int bufsize)
191 int new_file_flag = 1;
193 int to_write = nchars;
198 n_read = stdread (buf, bufsize);
200 error (EXIT_FAILURE, errno, "%s", infile);
205 if (to_read < to_write)
207 if (to_read) /* do not write 0 bytes! */
209 cwrite (new_file_flag, bp_out, to_read);
217 cwrite (new_file_flag, bp_out, to_write);
225 while (n_read == bufsize);
228 /* Split into pieces of exactly NLINES lines.
229 Use buffer BUF, whose size is BUFSIZE. */
232 lines_split (int nlines, char *buf, int bufsize)
235 char *bp, *bp_out, *eob;
236 int new_file_flag = 1;
241 n_read = stdread (buf, bufsize);
243 error (EXIT_FAILURE, errno, "%s", infile);
249 while (*bp++ != '\n')
250 ; /* this semicolon takes most of the time */
253 if (eob != bp_out) /* do not write 0 bytes! */
255 cwrite (new_file_flag, bp_out, eob - bp_out);
263 cwrite (new_file_flag, bp_out, bp - bp_out);
270 while (n_read == bufsize);
273 /* Split into pieces that are as large as possible while still not more
274 than NCHARS bytes, and are split on line boundaries except
275 where lines longer than NCHARS bytes occur. */
278 line_bytes_split (int nchars)
284 char *buf = (char *) xmalloc (nchars);
288 /* Fill up the full buffer size from the input file. */
290 n_read = stdread (buf + n_buffered, nchars - n_buffered);
292 error (EXIT_FAILURE, errno, "%s", infile);
294 n_buffered += n_read;
295 if (n_buffered != nchars)
298 /* Find where to end this chunk. */
299 bp = buf + n_buffered;
300 if (n_buffered == nchars)
302 while (bp > buf && bp[-1] != '\n')
306 /* If chunk has no newlines, use all the chunk. */
308 bp = buf + n_buffered;
310 /* Output the chars as one output file. */
311 cwrite (1, buf, bp - buf);
313 /* Discard the chars we just output; move rest of chunk
314 down to be the start of the next chunk. Source and
315 destination probably overlap. */
316 n_buffered -= bp - buf;
318 memmove (buf, bp, n_buffered);
325 main (int argc, char **argv)
327 struct stat stat_buf;
328 int num; /* numeric argument from command line */
331 type_undef, type_bytes, type_byteslines, type_lines, type_digits
332 } split_type = type_undef;
333 int in_blk_size; /* optimal block size of input file device */
334 char *buf; /* file i/o buffer */
338 int digits_optind = 0;
340 program_name = argv[0];
341 setlocale (LC_ALL, "");
342 bindtextdomain (PACKAGE, LOCALEDIR);
343 textdomain (PACKAGE);
345 parse_long_options (argc, argv, "split", GNU_PACKAGE, VERSION,
346 "Torbjorn Granlund and Richard M. Stallman", usage);
348 /* Parse command line options. */
355 /* This is the argv-index of the option we will read next. */
356 int this_optind = optind ? optind : 1;
359 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
369 if (split_type != type_undef)
371 error (0, 0, _("cannot split in more than one way"));
372 usage (EXIT_FAILURE);
374 split_type = type_bytes;
375 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
376 || tmp_long < 0 || tmp_long > INT_MAX)
378 error (0, 0, _("%s: invalid number of bytes"), optarg);
379 usage (EXIT_FAILURE);
381 accum = (int) tmp_long;
385 if (split_type != type_undef)
387 error (0, 0, _("cannot split in more than one way"));
388 usage (EXIT_FAILURE);
390 split_type = type_lines;
391 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
392 || tmp_long < 0 || tmp_long > INT_MAX)
394 error (0, 0, _("%s: invalid number of lines"), optarg);
395 usage (EXIT_FAILURE);
397 accum = (int) tmp_long;
401 if (split_type != type_undef)
403 error (0, 0, _("cannot split in more than one way"));
404 usage (EXIT_FAILURE);
407 split_type = type_byteslines;
408 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
409 || tmp_long < 0 || tmp_long > INT_MAX)
411 error (0, 0, _("%s: invalid number of bytes"), optarg);
412 usage (EXIT_FAILURE);
414 accum = (int) tmp_long;
427 if (split_type != type_undef && split_type != type_digits)
429 error (0, 0, _("cannot split in more than one way"));
430 usage (EXIT_FAILURE);
432 if (digits_optind != 0 && digits_optind != this_optind)
433 accum = 0; /* More than one number given; ignore other. */
434 digits_optind = this_optind;
435 split_type = type_digits;
436 accum = accum * 10 + c - '0';
444 usage (EXIT_FAILURE);
448 /* Handle default case. */
449 if (split_type == type_undef)
451 split_type = type_lines;
457 error (0, 0, _("invalid number"));
458 usage (EXIT_FAILURE);
462 /* Get out the filename arguments. */
465 infile = argv[optind++];
468 outbase = argv[optind++];
472 error (0, 0, _("too many arguments"));
473 usage (EXIT_FAILURE);
476 /* Open the input file. */
477 if (STREQ (infile, "-"))
481 input_desc = open (infile, O_RDONLY);
483 error (EXIT_FAILURE, errno, "%s", infile);
485 /* Binary I/O is safer when bytecounts are used. */
486 SET_BINARY (input_desc);
488 /* No output file is open now. */
491 /* Copy the output file prefix so we can add suffixes to it.
492 26**29 is certainly enough output files! */
494 outfile = xmalloc (strlen (outbase) + 30);
495 strcpy (outfile, outbase);
496 outfile_mid = outfile + strlen (outfile);
497 outfile_end = outfile_mid + 2;
498 memset (outfile_mid, 0, 30);
499 outfile_mid[0] = 'a';
500 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
502 /* Get the optimal block size of input device and make a buffer. */
504 if (fstat (input_desc, &stat_buf) < 0)
505 error (EXIT_FAILURE, errno, "%s", infile);
506 in_blk_size = ST_BLKSIZE (stat_buf);
508 buf = xmalloc (in_blk_size + 1);
514 lines_split (num, buf, in_blk_size);
518 bytes_split (num, buf, in_blk_size);
521 case type_byteslines:
522 line_bytes_split (num);
529 if (close (input_desc) < 0)
530 error (EXIT_FAILURE, errno, "%s", infile);
531 if (output_desc >= 0 && close (output_desc) < 0)
532 error (EXIT_FAILURE, errno, "%s", outfile);