1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
35 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
39 # define INT_MAX ((int) (UINT_MAX >> 1))
51 /* The name this program was run with. */
54 /* Base name of output files. */
57 /* Pointer to the end of the prefix in OUTFILE.
58 Suffixes are inserted here. */
59 static char *outfile_mid;
61 /* Pointer to the end of OUTFILE. */
62 static char *outfile_end;
64 /* Name of input file. May be "-". */
67 /* Descriptor on which input file is open. */
68 static int input_desc;
70 /* Descriptor on which output file is open. */
71 static int output_desc;
73 /* If nonzero, display usage information and exit. */
76 /* If nonzero, print the version on standard output then exit. */
77 static int show_version;
79 /* If nonzero, print a diagnostic on standard error just before each
80 output file is opened. */
83 static struct option const longopts[] =
85 {"bytes", required_argument, NULL, 'b'},
86 {"lines", required_argument, NULL, 'l'},
87 {"line-bytes", required_argument, NULL, 'C'},
88 {"verbose", no_argument, NULL, 2},
89 {"help", no_argument, &show_help, 1},
90 {"version", no_argument, &show_version, 1},
95 usage (int status, const char *reason)
98 fprintf (stderr, "%s: %s\n", program_name, reason);
101 fprintf (stderr, _("Try `%s --help' for more information.\n"),
106 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
110 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
111 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
113 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
114 -b, --bytes=SIZE put SIZE bytes per output file\n\
115 -l, --lines=NUMBER put NUMBER lines per output file\n\
116 --verbose print a diagnostic to standard error just\n\
117 before each output file is opened\n\
118 -NUMBER same as -l NUMBER\n\
119 --help display this help and exit\n\
120 --version output version information and exit\n\
122 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
128 /* Compute the next sequential output file name suffix and store it
129 into the string `outfile' at the position pointed to by `outfile_mid'. */
132 next_file_name (void)
138 static int first_call = 1;
140 /* Status for outfile name generation. */
141 static unsigned outfile_count = 0;
142 static unsigned outfile_name_limit = 25 * 26;
143 static unsigned outfile_name_generation = 1;
148 if (outfile_count < outfile_name_limit)
150 for (ne = outfile_end - 1; ; ne--)
162 outfile_name_limit *= 26;
163 outfile_name_generation++;
164 *outfile_mid++ = 'z';
165 for (i = 0; i <= outfile_name_generation; i++)
166 outfile_mid[i] = 'a';
170 /* Write BYTES bytes at BP to an output file.
171 If NEW_FILE_FLAG is nonzero, open the next output file.
172 Otherwise add to the same output file already in use. */
175 cwrite (int new_file_flag, const char *bp, int bytes)
179 if (output_desc >= 0 && close (output_desc) < 0)
180 error (1, errno, "%s", outfile);
184 fprintf (stderr, _("creating file `%s'\n"), outfile);
185 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
187 error (1, errno, "%s", outfile);
189 if (full_write (output_desc, bp, bytes) < 0)
190 error (1, errno, "%s", outfile);
193 /* Read NCHARS bytes from the input file into BUF.
194 Return the number of bytes successfully read.
195 If this is less than NCHARS, do not call `stdread' again. */
198 stdread (char *buf, int nchars)
201 int to_be_read = nchars;
205 n_read = safe_read (input_desc, buf, to_be_read);
210 to_be_read -= n_read;
213 return nchars - to_be_read;
216 /* Split into pieces of exactly NCHARS bytes.
217 Use buffer BUF, whose size is BUFSIZE. */
220 bytes_split (int nchars, char *buf, int bufsize)
223 int new_file_flag = 1;
225 int to_write = nchars;
230 n_read = stdread (buf, bufsize);
232 error (1, errno, "%s", infile);
237 if (to_read < to_write)
239 if (to_read) /* do not write 0 bytes! */
241 cwrite (new_file_flag, bp_out, to_read);
249 cwrite (new_file_flag, bp_out, to_write);
257 while (n_read == bufsize);
260 /* Split into pieces of exactly NLINES lines.
261 Use buffer BUF, whose size is BUFSIZE. */
264 lines_split (int nlines, char *buf, int bufsize)
267 char *bp, *bp_out, *eob;
268 int new_file_flag = 1;
273 n_read = stdread (buf, bufsize);
275 error (1, errno, "%s", infile);
281 while (*bp++ != '\n')
282 ; /* this semicolon takes most of the time */
285 if (eob != bp_out) /* do not write 0 bytes! */
287 cwrite (new_file_flag, bp_out, eob - bp_out);
295 cwrite (new_file_flag, bp_out, bp - bp_out);
302 while (n_read == bufsize);
305 /* Split into pieces that are as large as possible while still not more
306 than NCHARS bytes, and are split on line boundaries except
307 where lines longer than NCHARS bytes occur. */
310 line_bytes_split (int nchars)
316 char *buf = (char *) xmalloc (nchars);
320 /* Fill up the full buffer size from the input file. */
322 n_read = stdread (buf + n_buffered, nchars - n_buffered);
324 error (1, errno, "%s", infile);
326 n_buffered += n_read;
327 if (n_buffered != nchars)
330 /* Find where to end this chunk. */
331 bp = buf + n_buffered;
332 if (n_buffered == nchars)
334 while (bp > buf && bp[-1] != '\n')
338 /* If chunk has no newlines, use all the chunk. */
340 bp = buf + n_buffered;
342 /* Output the chars as one output file. */
343 cwrite (1, buf, bp - buf);
345 /* Discard the chars we just output; move rest of chunk
346 down to be the start of the next chunk. Source and
347 destination probably overlap. */
348 n_buffered -= bp - buf;
350 memmove (buf, bp, n_buffered);
357 main (int argc, char **argv)
359 struct stat stat_buf;
360 int num; /* numeric argument from command line */
363 type_undef, type_bytes, type_byteslines, type_lines, type_digits
364 } split_type = type_undef;
365 int in_blk_size; /* optimal block size of input file device */
366 char *buf; /* file i/o buffer */
370 int digits_optind = 0;
372 program_name = argv[0];
374 /* Parse command line options. */
381 /* This is the argv-index of the option we will read next. */
382 int this_optind = optind ? optind : 1;
385 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
395 if (split_type != type_undef)
396 usage (2, _("cannot split in more than one way"));
397 split_type = type_bytes;
398 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
399 || tmp_long < 0 || tmp_long > INT_MAX)
400 usage (2, _("invalid number of bytes"));
401 accum = (int) tmp_long;
405 if (split_type != type_undef)
406 usage (2, _("cannot split in more than one way"));
407 split_type = type_lines;
408 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
409 || tmp_long < 0 || tmp_long > INT_MAX)
410 usage (2, _("invalid number of lines"));
411 accum = (int) tmp_long;
415 if (split_type != type_undef)
416 usage (2, _("cannot split in more than one way"));
417 split_type = type_byteslines;
418 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
419 || tmp_long < 0 || tmp_long > INT_MAX)
420 usage (2, _("invalid number of bytes"));
421 accum = (int) tmp_long;
434 if (split_type != type_undef && split_type != type_digits)
435 usage (2, _("cannot split in more than one way"));
436 if (digits_optind != 0 && digits_optind != this_optind)
437 accum = 0; /* More than one number given; ignore other. */
438 digits_optind = this_optind;
439 split_type = type_digits;
440 accum = accum * 10 + c - '0';
448 usage (2, (char *)0);
454 printf ("split - %s\n", version_string);
459 usage (0, (char *)0);
461 /* Handle default case. */
462 if (split_type == type_undef)
464 split_type = type_lines;
469 usage (2, _("invalid number"));
472 /* Get out the filename arguments. */
475 infile = argv[optind++];
478 outbase = argv[optind++];
481 usage (2, _("too many arguments"));
483 /* Open the input file. */
484 if (!strcmp (infile, "-"))
488 input_desc = open (infile, O_RDONLY);
490 error (1, errno, "%s", infile);
493 /* No output file is open now. */
496 /* Copy the output file prefix so we can add suffixes to it.
497 26**29 is certainly enough output files! */
499 outfile = xmalloc (strlen (outbase) + 30);
500 strcpy (outfile, outbase);
501 outfile_mid = outfile + strlen (outfile);
502 outfile_end = outfile_mid + 2;
503 memset (outfile_mid, 0, 30);
504 outfile_mid[0] = 'a';
505 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
507 /* Get the optimal block size of input device and make a buffer. */
509 if (fstat (input_desc, &stat_buf) < 0)
510 error (1, errno, "%s", infile);
511 in_blk_size = ST_BLKSIZE (stat_buf);
513 buf = xmalloc (in_blk_size + 1);
519 lines_split (num, buf, in_blk_size);
523 bytes_split (num, buf, in_blk_size);
526 case type_byteslines:
527 line_bytes_split (num);
534 if (close (input_desc) < 0)
535 error (1, errno, "%s", infile);
536 if (output_desc >= 0 && close (output_desc) < 0)
537 error (1, errno, "%s", outfile);