1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
37 /* The name this program was run with. */
40 /* Base name of output files. */
43 /* Pointer to the end of the prefix in OUTFILE.
44 Suffixes are inserted here. */
45 static char *outfile_mid;
47 /* Pointer to the end of OUTFILE. */
48 static char *outfile_end;
50 /* Name of input file. May be "-". */
53 /* Descriptor on which input file is open. */
54 static int input_desc;
56 /* Descriptor on which output file is open. */
57 static int output_desc;
59 /* If non-zero, display usage information and exit. */
62 /* If non-zero, print the version on standard output then exit. */
63 static int show_version;
65 static struct option const longopts[] =
67 {"bytes", required_argument, NULL, 'b'},
68 {"lines", required_argument, NULL, 'l'},
69 {"line-bytes", required_argument, NULL, 'C'},
70 {"help", no_argument, &show_help, 1},
71 {"version", no_argument, &show_version, 1},
76 usage (int status, const char *reason)
79 fprintf (stderr, "%s: %s\n", program_name, reason);
82 fprintf (stderr, _("Try `%s --help' for more information.\n"),
87 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
91 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
92 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
94 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
95 -b, --bytes=SIZE put SIZE bytes per output file\n\
96 -l, --lines=NUMBER put NUMBER lines per output file\n\
97 -NUMBER same as -l NUMBER\n\
98 --help display this help and exit\n\
99 --version output version information and exit\n\
101 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
107 /* Return nonzero if the string STR is composed entirely of decimal digits. */
110 isdigits (const char *str)
122 /* Put the value of the number in STR into *VAL.
123 STR can specify a positive integer, optionally ending in `k'
124 to mean kilo or `m' to mean mega.
125 Return 0 if STR is valid, -1 if not. */
128 convint (const char *str, int *val)
131 int arglen = strlen (str);
135 switch (str[arglen - 1])
139 str[arglen - 1] = '\0';
143 str[arglen - 1] = '\0';
146 multiplier = 1048576;
147 str[arglen - 1] = '\0';
153 *val = atoi (str) * multiplier;
157 /* Compute the next sequential output file name suffix and store it
158 into the string `outfile' at the position pointed to by `outfile_mid'. */
161 next_file_name (void)
167 static int first_call = 1;
169 /* Status for outfile name generation. */
170 static unsigned outfile_count = 0;
171 static unsigned outfile_name_limit = 25 * 26;
172 static unsigned outfile_name_generation = 1;
177 if (outfile_count < outfile_name_limit)
179 for (ne = outfile_end - 1; ; ne--)
191 outfile_name_limit *= 26;
192 outfile_name_generation++;
193 *outfile_mid++ = 'z';
194 for (i = 0; i <= outfile_name_generation; i++)
195 outfile_mid[i] = 'a';
199 /* Write BYTES bytes at BP to an output file.
200 If NEW_FILE_FLAG is nonzero, open the next output file.
201 Otherwise add to the same output file already in use. */
204 cwrite (int new_file_flag, const char *bp, int bytes)
208 if (output_desc >= 0 && close (output_desc) < 0)
209 error (1, errno, "%s", outfile);
212 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
214 error (1, errno, "%s", outfile);
216 if (full_write (output_desc, bp, bytes) < 0)
217 error (1, errno, "%s", outfile);
220 /* Read NCHARS bytes from the input file into BUF.
221 Return the number of bytes successfully read.
222 If this is less than NCHARS, do not call `stdread' again. */
225 stdread (char *buf, int nchars)
228 int to_be_read = nchars;
232 n_read = safe_read (input_desc, buf, to_be_read);
237 to_be_read -= n_read;
240 return nchars - to_be_read;
243 /* Split into pieces of exactly NCHARS bytes.
244 Use buffer BUF, whose size is BUFSIZE. */
247 bytes_split (int nchars, char *buf, int bufsize)
250 int new_file_flag = 1;
252 int to_write = nchars;
257 n_read = stdread (buf, bufsize);
259 error (1, errno, "%s", infile);
264 if (to_read < to_write)
266 if (to_read) /* do not write 0 bytes! */
268 cwrite (new_file_flag, bp_out, to_read);
276 cwrite (new_file_flag, bp_out, to_write);
284 while (n_read == bufsize);
287 /* Split into pieces of exactly NLINES lines.
288 Use buffer BUF, whose size is BUFSIZE. */
291 lines_split (int nlines, char *buf, int bufsize)
294 char *bp, *bp_out, *eob;
295 int new_file_flag = 1;
300 n_read = stdread (buf, bufsize);
302 error (1, errno, "%s", infile);
308 while (*bp++ != '\n')
309 ; /* this semicolon takes most of the time */
312 if (eob != bp_out) /* do not write 0 bytes! */
314 cwrite (new_file_flag, bp_out, eob - bp_out);
322 cwrite (new_file_flag, bp_out, bp - bp_out);
329 while (n_read == bufsize);
332 /* Split into pieces that are as large as possible while still not more
333 than NCHARS bytes, and are split on line boundaries except
334 where lines longer than NCHARS bytes occur. */
337 line_bytes_split (int nchars)
343 char *buf = (char *) xmalloc (nchars);
347 /* Fill up the full buffer size from the input file. */
349 n_read = stdread (buf + n_buffered, nchars - n_buffered);
351 error (1, errno, "%s", infile);
353 n_buffered += n_read;
354 if (n_buffered != nchars)
357 /* Find where to end this chunk. */
358 bp = buf + n_buffered;
359 if (n_buffered == nchars)
361 while (bp > buf && bp[-1] != '\n')
365 /* If chunk has no newlines, use all the chunk. */
367 bp = buf + n_buffered;
369 /* Output the chars as one output file. */
370 cwrite (1, buf, bp - buf);
372 /* Discard the chars we just output; move rest of chunk
373 down to be the start of the next chunk. Source and
374 destination probably overlap. */
375 n_buffered -= bp - buf;
377 memmove (buf, bp, n_buffered);
384 main (int argc, char **argv)
386 struct stat stat_buf;
387 int num; /* numeric argument from command line */
390 type_undef, type_bytes, type_byteslines, type_lines, type_digits
391 } split_type = type_undef;
392 int in_blk_size; /* optimal block size of input file device */
393 char *buf; /* file i/o buffer */
397 int digits_optind = 0;
399 program_name = argv[0];
401 /* Parse command line options. */
408 /* This is the argv-index of the option we will read next. */
409 int this_optind = optind ? optind : 1;
411 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
421 if (split_type != type_undef)
422 usage (2, _("cannot split in more than one way"));
423 split_type = type_bytes;
424 /* FIXME: use xstrtoul */
425 if (convint (optarg, &accum) == -1)
426 usage (2, _("invalid number of bytes"));
430 if (split_type != type_undef)
431 usage (2, _("cannot split in more than one way"));
432 split_type = type_lines;
433 if (!isdigits (optarg))
434 usage (2, _("invalid number of lines"));
435 /* FIXME: use xstrtoul */
436 accum = atoi (optarg);
440 if (split_type != type_undef)
441 usage (2, _("cannot split in more than one way"));
442 split_type = type_byteslines;
443 /* FIXME: use xstrtoul */
444 if (convint (optarg, &accum) == -1)
445 usage (2, _("invalid number of bytes"));
458 if (split_type != type_undef && split_type != type_digits)
459 usage (2, _("cannot split in more than one way"));
460 if (digits_optind != 0 && digits_optind != this_optind)
461 accum = 0; /* More than one number given; ignore other. */
462 digits_optind = this_optind;
463 split_type = type_digits;
464 accum = accum * 10 + c - '0';
468 usage (2, (char *)0);
474 printf ("split - %s\n", version_string);
479 usage (0, (char *)0);
481 /* Handle default case. */
482 if (split_type == type_undef)
484 split_type = type_lines;
489 usage (2, _("invalid number"));
492 /* Get out the filename arguments. */
495 infile = argv[optind++];
498 outbase = argv[optind++];
501 usage (2, _("too many arguments"));
503 /* Open the input file. */
504 if (!strcmp (infile, "-"))
508 input_desc = open (infile, O_RDONLY);
510 error (1, errno, "%s", infile);
513 /* No output file is open now. */
516 /* Copy the output file prefix so we can add suffixes to it.
517 26**29 is certainly enough output files! */
519 outfile = xmalloc (strlen (outbase) + 30);
520 strcpy (outfile, outbase);
521 outfile_mid = outfile + strlen (outfile);
522 outfile_end = outfile_mid + 2;
523 memset (outfile_mid, 0, 30);
524 outfile_mid[0] = 'a';
525 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
527 /* Get the optimal block size of input device and make a buffer. */
529 if (fstat (input_desc, &stat_buf) < 0)
530 error (1, errno, "%s", infile);
531 in_blk_size = ST_BLKSIZE (stat_buf);
533 buf = xmalloc (in_blk_size + 1);
539 lines_split (num, buf, in_blk_size);
543 bytes_split (num, buf, in_blk_size);
546 case type_byteslines:
547 line_bytes_split (num);
554 if (close (input_desc) < 0)
555 error (1, errno, "%s", infile);
556 if (output_desc >= 0 && close (output_desc) < 0)
557 error (1, errno, "%s", outfile);