1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
33 #include "full-write.h"
34 #include "safe-read.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "split"
40 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
42 /* The name this program was run with. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end;
55 /* Name of input file. May be "-". */
58 /* Descriptor on which input file is open. */
59 static int input_desc;
61 /* Descriptor on which output file is open. */
62 static int output_desc;
64 /* If nonzero, print a diagnostic on standard error just before each
65 output file is opened. */
68 static struct option const longopts[] =
70 {"bytes", required_argument, NULL, 'b'},
71 {"lines", required_argument, NULL, 'l'},
72 {"line-bytes", required_argument, NULL, 'C'},
73 {"verbose", no_argument, NULL, 2},
74 {GETOPT_HELP_OPTION_DECL},
75 {GETOPT_VERSION_OPTION_DECL},
83 fprintf (stderr, _("Try `%s --help' for more information.\n"),
88 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
92 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
93 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
95 Mandatory arguments to long options are mandatory for short options too.\n\
96 -b, --bytes=SIZE put SIZE bytes per output file\n\
97 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
98 -l, --lines=NUMBER put NUMBER lines per output file\n\
101 -NUMBER same as -l NUMBER\n\
102 --verbose print a diagnostic to standard error just\n\
103 before each output file is opened\n\
104 --help display this help and exit\n\
105 --version output version information and exit\n\
107 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
109 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
111 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
114 /* Compute the next sequential output file name suffix and store it
115 into the string `outfile' at the position pointed to by `outfile_mid'. */
118 next_file_name (void)
120 static unsigned n_digits = 2;
123 /* Change any suffix of `z's to `a's. */
124 for (p = outfile_end - 1; *p == 'z'; p--)
129 /* Increment the rightmost non-`z' character that was present before the
130 above z/a substitutions. There is guaranteed to be such a character. */
133 /* If the result of that increment operation yielded a `z' and there
134 are only `z's to the left of it, then append two more `a' characters
135 to the end and add 1 (-1 + 2) to the number of digits (we're taking
136 out this `z' and adding two `a's). */
137 if (*p == 'z' && p == outfile_mid)
141 *outfile_end++ = 'a';
142 *outfile_end++ = 'a';
146 /* Write BYTES bytes at BP to an output file.
147 If NEW_FILE_FLAG is nonzero, open the next output file.
148 Otherwise add to the same output file already in use. */
151 cwrite (int new_file_flag, const char *bp, int bytes)
155 if (output_desc >= 0 && close (output_desc) < 0)
156 error (EXIT_FAILURE, errno, "%s", outfile);
160 fprintf (stderr, _("creating file `%s'\n"), outfile);
161 output_desc = open (outfile,
162 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
164 error (EXIT_FAILURE, errno, "%s", outfile);
166 if (full_write (output_desc, bp, bytes) != bytes)
167 error (EXIT_FAILURE, errno, "%s", outfile);
170 /* Read NCHARS bytes from the input file into BUF.
171 Return the number of bytes successfully read.
172 If this is less than NCHARS, do not call `stdread' again. */
175 stdread (char *buf, int nchars)
178 int to_be_read = nchars;
182 n_read = safe_read (input_desc, buf, to_be_read);
187 to_be_read -= n_read;
190 return nchars - to_be_read;
193 /* Split into pieces of exactly NCHARS bytes.
194 Use buffer BUF, whose size is BUFSIZE. */
197 bytes_split (int nchars, char *buf, int bufsize)
200 int new_file_flag = 1;
202 int to_write = nchars;
207 n_read = stdread (buf, bufsize);
209 error (EXIT_FAILURE, errno, "%s", infile);
214 if (to_read < to_write)
216 if (to_read) /* do not write 0 bytes! */
218 cwrite (new_file_flag, bp_out, to_read);
226 cwrite (new_file_flag, bp_out, to_write);
234 while (n_read == bufsize);
237 /* Split into pieces of exactly NLINES lines.
238 Use buffer BUF, whose size is BUFSIZE. */
241 lines_split (int nlines, char *buf, int bufsize)
244 char *bp, *bp_out, *eob;
245 int new_file_flag = 1;
250 n_read = stdread (buf, bufsize);
252 error (EXIT_FAILURE, errno, "%s", infile);
258 while (*bp++ != '\n')
259 ; /* this semicolon takes most of the time */
262 if (eob != bp_out) /* do not write 0 bytes! */
264 cwrite (new_file_flag, bp_out, eob - bp_out);
272 cwrite (new_file_flag, bp_out, bp - bp_out);
279 while (n_read == bufsize);
282 /* Split into pieces that are as large as possible while still not more
283 than NCHARS bytes, and are split on line boundaries except
284 where lines longer than NCHARS bytes occur. */
287 line_bytes_split (int nchars)
293 char *buf = (char *) xmalloc (nchars);
297 /* Fill up the full buffer size from the input file. */
299 n_read = stdread (buf + n_buffered, nchars - n_buffered);
301 error (EXIT_FAILURE, errno, "%s", infile);
303 n_buffered += n_read;
304 if (n_buffered != nchars)
307 /* Find where to end this chunk. */
308 bp = buf + n_buffered;
309 if (n_buffered == nchars)
311 while (bp > buf && bp[-1] != '\n')
315 /* If chunk has no newlines, use all the chunk. */
317 bp = buf + n_buffered;
319 /* Output the chars as one output file. */
320 cwrite (1, buf, bp - buf);
322 /* Discard the chars we just output; move rest of chunk
323 down to be the start of the next chunk. Source and
324 destination probably overlap. */
325 n_buffered -= bp - buf;
327 memmove (buf, bp, n_buffered);
334 main (int argc, char **argv)
336 struct stat stat_buf;
337 int num; /* numeric argument from command line */
340 type_undef, type_bytes, type_byteslines, type_lines, type_digits
341 } split_type = type_undef;
342 int in_blk_size; /* optimal block size of input file device */
343 char *buf; /* file i/o buffer */
347 int digits_optind = 0;
349 program_name = argv[0];
350 setlocale (LC_ALL, "");
351 bindtextdomain (PACKAGE, LOCALEDIR);
352 textdomain (PACKAGE);
354 atexit (close_stdout);
356 /* Parse command line options. */
363 /* This is the argv-index of the option we will read next. */
364 int this_optind = optind ? optind : 1;
367 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
377 if (split_type != type_undef)
379 error (0, 0, _("cannot split in more than one way"));
380 usage (EXIT_FAILURE);
382 split_type = type_bytes;
383 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
384 || tmp_long < 0 || tmp_long > INT_MAX)
386 error (0, 0, _("%s: invalid number of bytes"), optarg);
387 usage (EXIT_FAILURE);
389 accum = (int) tmp_long;
393 if (split_type != type_undef)
395 error (0, 0, _("cannot split in more than one way"));
396 usage (EXIT_FAILURE);
398 split_type = type_lines;
399 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
400 || tmp_long < 0 || tmp_long > INT_MAX)
402 error (0, 0, _("%s: invalid number of lines"), optarg);
403 usage (EXIT_FAILURE);
405 accum = (int) tmp_long;
409 if (split_type != type_undef)
411 error (0, 0, _("cannot split in more than one way"));
412 usage (EXIT_FAILURE);
415 split_type = type_byteslines;
416 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
417 || tmp_long < 0 || tmp_long > INT_MAX)
419 error (0, 0, _("%s: invalid number of bytes"), optarg);
420 usage (EXIT_FAILURE);
422 accum = (int) tmp_long;
435 if (split_type != type_undef && split_type != type_digits)
437 error (0, 0, _("cannot split in more than one way"));
438 usage (EXIT_FAILURE);
440 if (digits_optind != 0 && digits_optind != this_optind)
441 accum = 0; /* More than one number given; ignore other. */
442 digits_optind = this_optind;
443 split_type = type_digits;
444 accum = accum * 10 + c - '0';
451 case_GETOPT_HELP_CHAR;
453 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
456 usage (EXIT_FAILURE);
460 /* Handle default case. */
461 if (split_type == type_undef)
463 split_type = type_lines;
469 error (0, 0, _("invalid number"));
470 usage (EXIT_FAILURE);
474 /* Get out the filename arguments. */
477 infile = argv[optind++];
480 outbase = argv[optind++];
484 error (0, 0, _("too many arguments"));
485 usage (EXIT_FAILURE);
488 /* Open the input file. */
489 if (STREQ (infile, "-"))
493 input_desc = open (infile, O_RDONLY);
495 error (EXIT_FAILURE, errno, "%s", infile);
497 /* Binary I/O is safer when bytecounts are used. */
498 SET_BINARY (input_desc);
500 /* No output file is open now. */
503 /* Copy the output file prefix so we can add suffixes to it.
504 26**29 is certainly enough output files! */
506 outfile = xmalloc (strlen (outbase) + 30);
507 strcpy (outfile, outbase);
508 outfile_mid = outfile + strlen (outfile);
509 outfile_end = outfile_mid + 2;
510 memset (outfile_mid, 0, 30);
511 outfile_mid[0] = 'a';
512 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
514 /* Get the optimal block size of input device and make a buffer. */
516 if (fstat (input_desc, &stat_buf) < 0)
517 error (EXIT_FAILURE, errno, "%s", infile);
518 in_blk_size = ST_BLKSIZE (stat_buf);
520 buf = xmalloc (in_blk_size + 1);
526 lines_split (num, buf, in_blk_size);
530 bytes_split (num, buf, in_blk_size);
533 case type_byteslines:
534 line_bytes_split (num);
541 if (close (input_desc) < 0)
542 error (EXIT_FAILURE, errno, "%s", infile);
543 if (output_desc >= 0 && close (output_desc) < 0)
544 error (EXIT_FAILURE, errno, "%s", outfile);