1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
33 #include "full-write.h"
34 #include "safe-read.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "split"
40 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
42 /* The name this program was run with. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end;
55 /* Name of input file. May be "-". */
58 /* Descriptor on which input file is open. */
59 static int input_desc;
61 /* Descriptor on which output file is open. */
62 static int output_desc;
64 /* If nonzero, print a diagnostic on standard error just before each
65 output file is opened. */
68 static struct option const longopts[] =
70 {"bytes", required_argument, NULL, 'b'},
71 {"lines", required_argument, NULL, 'l'},
72 {"line-bytes", required_argument, NULL, 'C'},
73 {"verbose", no_argument, NULL, 2},
74 {GETOPT_HELP_OPTION_DECL},
75 {GETOPT_VERSION_OPTION_DECL},
83 fprintf (stderr, _("Try `%s --help' for more information.\n"),
88 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
92 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
93 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
97 Mandatory arguments to long options are mandatory for short options too.\n\
100 -b, --bytes=SIZE put SIZE bytes per output file\n\
101 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
102 -l, --lines=NUMBER put NUMBER lines per output file\n\
103 -NUMBER same as -l NUMBER\n\
104 --verbose print a diagnostic to standard error just\n\
105 before each output file is opened\n\
107 EMIT_HELP_DESCRIPTION (stdout);
108 EMIT_VERSION_DESCRIPTION (stdout);
111 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
113 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
115 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
118 /* Compute the next sequential output file name suffix and store it
119 into the string `outfile' at the position pointed to by `outfile_mid'. */
122 next_file_name (void)
124 static unsigned n_digits = 2;
127 /* Change any suffix of `z's to `a's. */
128 for (p = outfile_end - 1; *p == 'z'; p--)
133 /* Increment the rightmost non-`z' character that was present before the
134 above z/a substitutions. There is guaranteed to be such a character. */
137 /* If the result of that increment operation yielded a `z' and there
138 are only `z's to the left of it, then append two more `a' characters
139 to the end and add 1 (-1 + 2) to the number of digits (we're taking
140 out this `z' and adding two `a's). */
141 if (*p == 'z' && p == outfile_mid)
145 *outfile_end++ = 'a';
146 *outfile_end++ = 'a';
150 /* Write BYTES bytes at BP to an output file.
151 If NEW_FILE_FLAG is nonzero, open the next output file.
152 Otherwise add to the same output file already in use. */
155 cwrite (int new_file_flag, const char *bp, int bytes)
159 if (output_desc >= 0 && close (output_desc) < 0)
160 error (EXIT_FAILURE, errno, "%s", outfile);
164 fprintf (stderr, _("creating file `%s'\n"), outfile);
165 output_desc = open (outfile,
166 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
168 error (EXIT_FAILURE, errno, "%s", outfile);
170 if (full_write (output_desc, bp, bytes) != bytes)
171 error (EXIT_FAILURE, errno, "%s", outfile);
174 /* Read NCHARS bytes from the input file into BUF.
175 Return the number of bytes successfully read.
176 If this is less than NCHARS, do not call `stdread' again. */
179 stdread (char *buf, int nchars)
182 int to_be_read = nchars;
186 n_read = safe_read (input_desc, buf, to_be_read);
191 to_be_read -= n_read;
194 return nchars - to_be_read;
197 /* Split into pieces of exactly NCHARS bytes.
198 Use buffer BUF, whose size is BUFSIZE. */
201 bytes_split (int nchars, char *buf, int bufsize)
204 int new_file_flag = 1;
206 int to_write = nchars;
211 n_read = stdread (buf, bufsize);
213 error (EXIT_FAILURE, errno, "%s", infile);
218 if (to_read < to_write)
220 if (to_read) /* do not write 0 bytes! */
222 cwrite (new_file_flag, bp_out, to_read);
230 cwrite (new_file_flag, bp_out, to_write);
238 while (n_read == bufsize);
241 /* Split into pieces of exactly NLINES lines.
242 Use buffer BUF, whose size is BUFSIZE. */
245 lines_split (int nlines, char *buf, int bufsize)
248 char *bp, *bp_out, *eob;
249 int new_file_flag = 1;
254 n_read = stdread (buf, bufsize);
256 error (EXIT_FAILURE, errno, "%s", infile);
262 while (*bp++ != '\n')
263 ; /* this semicolon takes most of the time */
266 if (eob != bp_out) /* do not write 0 bytes! */
268 cwrite (new_file_flag, bp_out, eob - bp_out);
276 cwrite (new_file_flag, bp_out, bp - bp_out);
283 while (n_read == bufsize);
286 /* Split into pieces that are as large as possible while still not more
287 than NCHARS bytes, and are split on line boundaries except
288 where lines longer than NCHARS bytes occur. */
291 line_bytes_split (int nchars)
297 char *buf = (char *) xmalloc (nchars);
301 /* Fill up the full buffer size from the input file. */
303 n_read = stdread (buf + n_buffered, nchars - n_buffered);
305 error (EXIT_FAILURE, errno, "%s", infile);
307 n_buffered += n_read;
308 if (n_buffered != nchars)
311 /* Find where to end this chunk. */
312 bp = buf + n_buffered;
313 if (n_buffered == nchars)
315 while (bp > buf && bp[-1] != '\n')
319 /* If chunk has no newlines, use all the chunk. */
321 bp = buf + n_buffered;
323 /* Output the chars as one output file. */
324 cwrite (1, buf, bp - buf);
326 /* Discard the chars we just output; move rest of chunk
327 down to be the start of the next chunk. Source and
328 destination probably overlap. */
329 n_buffered -= bp - buf;
331 memmove (buf, bp, n_buffered);
338 main (int argc, char **argv)
340 struct stat stat_buf;
341 int num; /* numeric argument from command line */
344 type_undef, type_bytes, type_byteslines, type_lines, type_digits
345 } split_type = type_undef;
346 int in_blk_size; /* optimal block size of input file device */
347 char *buf; /* file i/o buffer */
351 int digits_optind = 0;
353 program_name = argv[0];
354 setlocale (LC_ALL, "");
355 bindtextdomain (PACKAGE, LOCALEDIR);
356 textdomain (PACKAGE);
358 atexit (close_stdout);
360 /* Parse command line options. */
367 /* This is the argv-index of the option we will read next. */
368 int this_optind = optind ? optind : 1;
371 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
381 if (split_type != type_undef)
383 error (0, 0, _("cannot split in more than one way"));
384 usage (EXIT_FAILURE);
386 split_type = type_bytes;
387 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
388 || tmp_long < 0 || tmp_long > INT_MAX)
390 error (0, 0, _("%s: invalid number of bytes"), optarg);
391 usage (EXIT_FAILURE);
393 accum = (int) tmp_long;
397 if (split_type != type_undef)
399 error (0, 0, _("cannot split in more than one way"));
400 usage (EXIT_FAILURE);
402 split_type = type_lines;
403 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
404 || tmp_long < 0 || tmp_long > INT_MAX)
406 error (0, 0, _("%s: invalid number of lines"), optarg);
407 usage (EXIT_FAILURE);
409 accum = (int) tmp_long;
413 if (split_type != type_undef)
415 error (0, 0, _("cannot split in more than one way"));
416 usage (EXIT_FAILURE);
419 split_type = type_byteslines;
420 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
421 || tmp_long < 0 || tmp_long > INT_MAX)
423 error (0, 0, _("%s: invalid number of bytes"), optarg);
424 usage (EXIT_FAILURE);
426 accum = (int) tmp_long;
439 if (split_type != type_undef && split_type != type_digits)
441 error (0, 0, _("cannot split in more than one way"));
442 usage (EXIT_FAILURE);
444 if (digits_optind != 0 && digits_optind != this_optind)
445 accum = 0; /* More than one number given; ignore other. */
446 digits_optind = this_optind;
447 split_type = type_digits;
448 accum = accum * 10 + c - '0';
455 case_GETOPT_HELP_CHAR;
457 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
460 usage (EXIT_FAILURE);
464 /* Handle default case. */
465 if (split_type == type_undef)
467 split_type = type_lines;
473 error (0, 0, _("invalid number"));
474 usage (EXIT_FAILURE);
478 /* Get out the filename arguments. */
481 infile = argv[optind++];
484 outbase = argv[optind++];
488 error (0, 0, _("too many arguments"));
489 usage (EXIT_FAILURE);
492 /* Open the input file. */
493 if (STREQ (infile, "-"))
497 input_desc = open (infile, O_RDONLY);
499 error (EXIT_FAILURE, errno, "%s", infile);
501 /* Binary I/O is safer when bytecounts are used. */
502 SET_BINARY (input_desc);
504 /* No output file is open now. */
507 /* Copy the output file prefix so we can add suffixes to it.
508 26**29 is certainly enough output files! */
510 outfile = xmalloc (strlen (outbase) + 30);
511 strcpy (outfile, outbase);
512 outfile_mid = outfile + strlen (outfile);
513 outfile_end = outfile_mid + 2;
514 memset (outfile_mid, 0, 30);
515 outfile_mid[0] = 'a';
516 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
518 /* Get the optimal block size of input device and make a buffer. */
520 if (fstat (input_desc, &stat_buf) < 0)
521 error (EXIT_FAILURE, errno, "%s", infile);
522 in_blk_size = ST_BLKSIZE (stat_buf);
524 buf = xmalloc (in_blk_size + 1);
530 lines_split (num, buf, in_blk_size);
534 bytes_split (num, buf, in_blk_size);
537 case type_byteslines:
538 line_bytes_split (num);
545 if (close (input_desc) < 0)
546 error (EXIT_FAILURE, errno, "%s", infile);
547 if (output_desc >= 0 && close (output_desc) < 0)
548 error (EXIT_FAILURE, errno, "%s", outfile);