1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
32 #include "long-options.h"
33 #include "safe-read.h"
36 /* The official name of this program (e.g., no `g' prefix). */
37 #define PROGRAM_NAME "split"
41 /* The name this program was run with. */
44 /* Base name of output files. */
47 /* Pointer to the end of the prefix in OUTFILE.
48 Suffixes are inserted here. */
49 static char *outfile_mid;
51 /* Pointer to the end of OUTFILE. */
52 static char *outfile_end;
54 /* Name of input file. May be "-". */
57 /* Descriptor on which input file is open. */
58 static int input_desc;
60 /* Descriptor on which output file is open. */
61 static int output_desc;
63 /* If nonzero, print a diagnostic on standard error just before each
64 output file is opened. */
67 static struct option const longopts[] =
69 {"bytes", required_argument, NULL, 'b'},
70 {"lines", required_argument, NULL, 'l'},
71 {"line-bytes", required_argument, NULL, 'C'},
72 {"verbose", no_argument, NULL, 2},
80 fprintf (stderr, _("Try `%s --help' for more information.\n"),
85 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
89 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
90 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
92 -b, --bytes=SIZE put SIZE bytes per output file\n\
93 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
94 -l, --lines=NUMBER put NUMBER lines per output file\n\
95 -NUMBER same as -l NUMBER\n\
96 --verbose print a diagnostic to standard error just\n\
97 before each output file is opened\n\
98 --help display this help and exit\n\
99 --version output version information and exit\n\
101 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
103 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
105 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
108 /* Compute the next sequential output file name suffix and store it
109 into the string `outfile' at the position pointed to by `outfile_mid'. */
112 next_file_name (void)
114 static unsigned n_digits = 2;
117 /* Change any suffix of `z's to `a's. */
118 for (p = outfile_end - 1; *p == 'z'; p--)
123 /* Increment the rightmost non-`z' character that was present before the
124 above z/a substitutions. There is guaranteed to be such a character. */
127 /* If the result of that increment operation yielded a `z' and there
128 are only `z's to the left of it, then append two more `a' characters
129 to the end and add 1 (-1 + 2) to the number of digits (we're taking
130 out this `z' and adding two `a's). */
131 if (*p == 'z' && p == outfile_mid)
135 *outfile_end++ = 'a';
136 *outfile_end++ = 'a';
140 /* Write BYTES bytes at BP to an output file.
141 If NEW_FILE_FLAG is nonzero, open the next output file.
142 Otherwise add to the same output file already in use. */
145 cwrite (int new_file_flag, const char *bp, int bytes)
149 if (output_desc >= 0 && close (output_desc) < 0)
150 error (EXIT_FAILURE, errno, "%s", outfile);
154 fprintf (stderr, _("creating file `%s'\n"), outfile);
155 output_desc = open (outfile,
156 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
158 error (EXIT_FAILURE, errno, "%s", outfile);
160 if (full_write (output_desc, bp, bytes) < 0)
161 error (EXIT_FAILURE, errno, "%s", outfile);
164 /* Read NCHARS bytes from the input file into BUF.
165 Return the number of bytes successfully read.
166 If this is less than NCHARS, do not call `stdread' again. */
169 stdread (char *buf, int nchars)
172 int to_be_read = nchars;
176 n_read = safe_read (input_desc, buf, to_be_read);
181 to_be_read -= n_read;
184 return nchars - to_be_read;
187 /* Split into pieces of exactly NCHARS bytes.
188 Use buffer BUF, whose size is BUFSIZE. */
191 bytes_split (int nchars, char *buf, int bufsize)
194 int new_file_flag = 1;
196 int to_write = nchars;
201 n_read = stdread (buf, bufsize);
203 error (EXIT_FAILURE, errno, "%s", infile);
208 if (to_read < to_write)
210 if (to_read) /* do not write 0 bytes! */
212 cwrite (new_file_flag, bp_out, to_read);
220 cwrite (new_file_flag, bp_out, to_write);
228 while (n_read == bufsize);
231 /* Split into pieces of exactly NLINES lines.
232 Use buffer BUF, whose size is BUFSIZE. */
235 lines_split (int nlines, char *buf, int bufsize)
238 char *bp, *bp_out, *eob;
239 int new_file_flag = 1;
244 n_read = stdread (buf, bufsize);
246 error (EXIT_FAILURE, errno, "%s", infile);
252 while (*bp++ != '\n')
253 ; /* this semicolon takes most of the time */
256 if (eob != bp_out) /* do not write 0 bytes! */
258 cwrite (new_file_flag, bp_out, eob - bp_out);
266 cwrite (new_file_flag, bp_out, bp - bp_out);
273 while (n_read == bufsize);
276 /* Split into pieces that are as large as possible while still not more
277 than NCHARS bytes, and are split on line boundaries except
278 where lines longer than NCHARS bytes occur. */
281 line_bytes_split (int nchars)
287 char *buf = (char *) xmalloc (nchars);
291 /* Fill up the full buffer size from the input file. */
293 n_read = stdread (buf + n_buffered, nchars - n_buffered);
295 error (EXIT_FAILURE, errno, "%s", infile);
297 n_buffered += n_read;
298 if (n_buffered != nchars)
301 /* Find where to end this chunk. */
302 bp = buf + n_buffered;
303 if (n_buffered == nchars)
305 while (bp > buf && bp[-1] != '\n')
309 /* If chunk has no newlines, use all the chunk. */
311 bp = buf + n_buffered;
313 /* Output the chars as one output file. */
314 cwrite (1, buf, bp - buf);
316 /* Discard the chars we just output; move rest of chunk
317 down to be the start of the next chunk. Source and
318 destination probably overlap. */
319 n_buffered -= bp - buf;
321 memmove (buf, bp, n_buffered);
328 main (int argc, char **argv)
330 struct stat stat_buf;
331 int num; /* numeric argument from command line */
334 type_undef, type_bytes, type_byteslines, type_lines, type_digits
335 } split_type = type_undef;
336 int in_blk_size; /* optimal block size of input file device */
337 char *buf; /* file i/o buffer */
341 int digits_optind = 0;
343 program_name = argv[0];
344 setlocale (LC_ALL, "");
345 bindtextdomain (PACKAGE, LOCALEDIR);
346 textdomain (PACKAGE);
348 parse_long_options (argc, argv, PROGRAM_NAME, GNU_PACKAGE, VERSION,
349 "Torbjorn Granlund and Richard M. Stallman", usage);
351 /* Parse command line options. */
358 /* This is the argv-index of the option we will read next. */
359 int this_optind = optind ? optind : 1;
362 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
372 if (split_type != type_undef)
374 error (0, 0, _("cannot split in more than one way"));
375 usage (EXIT_FAILURE);
377 split_type = type_bytes;
378 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
379 || tmp_long < 0 || tmp_long > INT_MAX)
381 error (0, 0, _("%s: invalid number of bytes"), optarg);
382 usage (EXIT_FAILURE);
384 accum = (int) tmp_long;
388 if (split_type != type_undef)
390 error (0, 0, _("cannot split in more than one way"));
391 usage (EXIT_FAILURE);
393 split_type = type_lines;
394 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
395 || tmp_long < 0 || tmp_long > INT_MAX)
397 error (0, 0, _("%s: invalid number of lines"), optarg);
398 usage (EXIT_FAILURE);
400 accum = (int) tmp_long;
404 if (split_type != type_undef)
406 error (0, 0, _("cannot split in more than one way"));
407 usage (EXIT_FAILURE);
410 split_type = type_byteslines;
411 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
412 || tmp_long < 0 || tmp_long > INT_MAX)
414 error (0, 0, _("%s: invalid number of bytes"), optarg);
415 usage (EXIT_FAILURE);
417 accum = (int) tmp_long;
430 if (split_type != type_undef && split_type != type_digits)
432 error (0, 0, _("cannot split in more than one way"));
433 usage (EXIT_FAILURE);
435 if (digits_optind != 0 && digits_optind != this_optind)
436 accum = 0; /* More than one number given; ignore other. */
437 digits_optind = this_optind;
438 split_type = type_digits;
439 accum = accum * 10 + c - '0';
447 usage (EXIT_FAILURE);
451 /* Handle default case. */
452 if (split_type == type_undef)
454 split_type = type_lines;
460 error (0, 0, _("invalid number"));
461 usage (EXIT_FAILURE);
465 /* Get out the filename arguments. */
468 infile = argv[optind++];
471 outbase = argv[optind++];
475 error (0, 0, _("too many arguments"));
476 usage (EXIT_FAILURE);
479 /* Open the input file. */
480 if (STREQ (infile, "-"))
484 input_desc = open (infile, O_RDONLY);
486 error (EXIT_FAILURE, errno, "%s", infile);
488 /* Binary I/O is safer when bytecounts are used. */
489 SET_BINARY (input_desc);
491 /* No output file is open now. */
494 /* Copy the output file prefix so we can add suffixes to it.
495 26**29 is certainly enough output files! */
497 outfile = xmalloc (strlen (outbase) + 30);
498 strcpy (outfile, outbase);
499 outfile_mid = outfile + strlen (outfile);
500 outfile_end = outfile_mid + 2;
501 memset (outfile_mid, 0, 30);
502 outfile_mid[0] = 'a';
503 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
505 /* Get the optimal block size of input device and make a buffer. */
507 if (fstat (input_desc, &stat_buf) < 0)
508 error (EXIT_FAILURE, errno, "%s", infile);
509 in_blk_size = ST_BLKSIZE (stat_buf);
511 buf = xmalloc (in_blk_size + 1);
517 lines_split (num, buf, in_blk_size);
521 bytes_split (num, buf, in_blk_size);
524 case type_byteslines:
525 line_bytes_split (num);
532 if (close (input_desc) < 0)
533 error (EXIT_FAILURE, errno, "%s", infile);
534 if (output_desc >= 0 && close (output_desc) < 0)
535 error (EXIT_FAILURE, errno, "%s", outfile);