1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-1998, 1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
33 #include "safe-read.h"
37 /* The name this program was run with. */
40 /* Base name of output files. */
43 /* Pointer to the end of the prefix in OUTFILE.
44 Suffixes are inserted here. */
45 static char *outfile_mid;
47 /* Pointer to the end of OUTFILE. */
48 static char *outfile_end;
50 /* Name of input file. May be "-". */
53 /* Descriptor on which input file is open. */
54 static int input_desc;
56 /* Descriptor on which output file is open. */
57 static int output_desc;
59 /* If nonzero, display usage information and exit. */
62 /* If nonzero, print the version on standard output then exit. */
63 static int show_version;
65 /* If nonzero, print a diagnostic on standard error just before each
66 output file is opened. */
69 static struct option const longopts[] =
71 {"bytes", required_argument, NULL, 'b'},
72 {"lines", required_argument, NULL, 'l'},
73 {"line-bytes", required_argument, NULL, 'C'},
74 {"verbose", no_argument, NULL, 2},
75 {"help", no_argument, &show_help, 1},
76 {"version", no_argument, &show_version, 1},
84 fprintf (stderr, _("Try `%s --help' for more information.\n"),
89 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
93 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
94 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
96 -b, --bytes=SIZE put SIZE bytes per output file\n\
97 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
98 -l, --lines=NUMBER put NUMBER lines per output file\n\
99 -NUMBER same as -l NUMBER\n\
100 --verbose print a diagnostic to standard error just\n\
101 before each output file is opened\n\
102 --help display this help and exit\n\
103 --version output version information and exit\n\
105 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
107 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
109 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
112 /* Compute the next sequential output file name suffix and store it
113 into the string `outfile' at the position pointed to by `outfile_mid'. */
116 next_file_name (void)
118 static unsigned n_digits = 2;
121 /* Change any suffix of `z's to `a's. */
122 for (p = outfile_end - 1; *p == 'z'; p--)
127 /* Increment the rightmost non-`z' character that was present before the
128 above z/a substitutions. There is guaranteed to be such a character. */
131 /* If the result of that increment operation yielded a `z' and there
132 are only `z's to the left of it, then append two more `a' characters
133 to the end and add 1 (-1 + 2) to the number of digits (we're taking
134 out this `z' and adding two `a's). */
135 if (*p == 'z' && p == outfile_mid)
139 *outfile_end++ = 'a';
140 *outfile_end++ = 'a';
144 /* Write BYTES bytes at BP to an output file.
145 If NEW_FILE_FLAG is nonzero, open the next output file.
146 Otherwise add to the same output file already in use. */
149 cwrite (int new_file_flag, const char *bp, int bytes)
153 if (output_desc >= 0 && close (output_desc) < 0)
154 error (EXIT_FAILURE, errno, "%s", outfile);
158 fprintf (stderr, _("creating file `%s'\n"), outfile);
159 output_desc = open (outfile,
160 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
162 error (EXIT_FAILURE, errno, "%s", outfile);
164 if (full_write (output_desc, bp, bytes) < 0)
165 error (EXIT_FAILURE, errno, "%s", outfile);
168 /* Read NCHARS bytes from the input file into BUF.
169 Return the number of bytes successfully read.
170 If this is less than NCHARS, do not call `stdread' again. */
173 stdread (char *buf, int nchars)
176 int to_be_read = nchars;
180 n_read = safe_read (input_desc, buf, to_be_read);
185 to_be_read -= n_read;
188 return nchars - to_be_read;
191 /* Split into pieces of exactly NCHARS bytes.
192 Use buffer BUF, whose size is BUFSIZE. */
195 bytes_split (int nchars, char *buf, int bufsize)
198 int new_file_flag = 1;
200 int to_write = nchars;
205 n_read = stdread (buf, bufsize);
207 error (EXIT_FAILURE, errno, "%s", infile);
212 if (to_read < to_write)
214 if (to_read) /* do not write 0 bytes! */
216 cwrite (new_file_flag, bp_out, to_read);
224 cwrite (new_file_flag, bp_out, to_write);
232 while (n_read == bufsize);
235 /* Split into pieces of exactly NLINES lines.
236 Use buffer BUF, whose size is BUFSIZE. */
239 lines_split (int nlines, char *buf, int bufsize)
242 char *bp, *bp_out, *eob;
243 int new_file_flag = 1;
248 n_read = stdread (buf, bufsize);
250 error (EXIT_FAILURE, errno, "%s", infile);
256 while (*bp++ != '\n')
257 ; /* this semicolon takes most of the time */
260 if (eob != bp_out) /* do not write 0 bytes! */
262 cwrite (new_file_flag, bp_out, eob - bp_out);
270 cwrite (new_file_flag, bp_out, bp - bp_out);
277 while (n_read == bufsize);
280 /* Split into pieces that are as large as possible while still not more
281 than NCHARS bytes, and are split on line boundaries except
282 where lines longer than NCHARS bytes occur. */
285 line_bytes_split (int nchars)
291 char *buf = (char *) xmalloc (nchars);
295 /* Fill up the full buffer size from the input file. */
297 n_read = stdread (buf + n_buffered, nchars - n_buffered);
299 error (EXIT_FAILURE, errno, "%s", infile);
301 n_buffered += n_read;
302 if (n_buffered != nchars)
305 /* Find where to end this chunk. */
306 bp = buf + n_buffered;
307 if (n_buffered == nchars)
309 while (bp > buf && bp[-1] != '\n')
313 /* If chunk has no newlines, use all the chunk. */
315 bp = buf + n_buffered;
317 /* Output the chars as one output file. */
318 cwrite (1, buf, bp - buf);
320 /* Discard the chars we just output; move rest of chunk
321 down to be the start of the next chunk. Source and
322 destination probably overlap. */
323 n_buffered -= bp - buf;
325 memmove (buf, bp, n_buffered);
332 main (int argc, char **argv)
334 struct stat stat_buf;
335 int num; /* numeric argument from command line */
338 type_undef, type_bytes, type_byteslines, type_lines, type_digits
339 } split_type = type_undef;
340 int in_blk_size; /* optimal block size of input file device */
341 char *buf; /* file i/o buffer */
345 int digits_optind = 0;
347 program_name = argv[0];
348 setlocale (LC_ALL, "");
349 bindtextdomain (PACKAGE, LOCALEDIR);
350 textdomain (PACKAGE);
352 /* Parse command line options. */
359 /* This is the argv-index of the option we will read next. */
360 int this_optind = optind ? optind : 1;
363 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
373 if (split_type != type_undef)
375 error (0, 0, _("cannot split in more than one way"));
376 usage (EXIT_FAILURE);
378 split_type = type_bytes;
379 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
380 || tmp_long < 0 || tmp_long > INT_MAX)
382 error (0, 0, _("%s: invalid number of bytes"), optarg);
383 usage (EXIT_FAILURE);
385 accum = (int) tmp_long;
389 if (split_type != type_undef)
391 error (0, 0, _("cannot split in more than one way"));
392 usage (EXIT_FAILURE);
394 split_type = type_lines;
395 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
396 || tmp_long < 0 || tmp_long > INT_MAX)
398 error (0, 0, _("%s: invalid number of lines"), optarg);
399 usage (EXIT_FAILURE);
401 accum = (int) tmp_long;
405 if (split_type != type_undef)
407 error (0, 0, _("cannot split in more than one way"));
408 usage (EXIT_FAILURE);
411 split_type = type_byteslines;
412 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
413 || tmp_long < 0 || tmp_long > INT_MAX)
415 error (0, 0, _("%s: invalid number of bytes"), optarg);
416 usage (EXIT_FAILURE);
418 accum = (int) tmp_long;
431 if (split_type != type_undef && split_type != type_digits)
433 error (0, 0, _("cannot split in more than one way"));
434 usage (EXIT_FAILURE);
436 if (digits_optind != 0 && digits_optind != this_optind)
437 accum = 0; /* More than one number given; ignore other. */
438 digits_optind = this_optind;
439 split_type = type_digits;
440 accum = accum * 10 + c - '0';
448 usage (EXIT_FAILURE);
454 printf ("split (%s) %s\n", GNU_PACKAGE, VERSION);
461 /* Handle default case. */
462 if (split_type == type_undef)
464 split_type = type_lines;
470 error (0, 0, _("invalid number"));
471 usage (EXIT_FAILURE);
475 /* Get out the filename arguments. */
478 infile = argv[optind++];
481 outbase = argv[optind++];
485 error (0, 0, _("too many arguments"));
486 usage (EXIT_FAILURE);
489 /* Open the input file. */
490 if (STREQ (infile, "-"))
494 input_desc = open (infile, O_RDONLY);
496 error (EXIT_FAILURE, errno, "%s", infile);
498 /* Binary I/O is safer when bytecounts are used. */
499 SET_BINARY (input_desc);
501 /* No output file is open now. */
504 /* Copy the output file prefix so we can add suffixes to it.
505 26**29 is certainly enough output files! */
507 outfile = xmalloc (strlen (outbase) + 30);
508 strcpy (outfile, outbase);
509 outfile_mid = outfile + strlen (outfile);
510 outfile_end = outfile_mid + 2;
511 memset (outfile_mid, 0, 30);
512 outfile_mid[0] = 'a';
513 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
515 /* Get the optimal block size of input device and make a buffer. */
517 if (fstat (input_desc, &stat_buf) < 0)
518 error (EXIT_FAILURE, errno, "%s", infile);
519 in_blk_size = ST_BLKSIZE (stat_buf);
521 buf = xmalloc (in_blk_size + 1);
527 lines_split (num, buf, in_blk_size);
531 bytes_split (num, buf, in_blk_size);
534 case type_byteslines:
535 line_bytes_split (num);
542 if (close (input_desc) < 0)
543 error (EXIT_FAILURE, errno, "%s", infile);
544 if (output_desc >= 0 && close (output_desc) < 0)
545 error (EXIT_FAILURE, errno, "%s", outfile);