1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
25 #if defined (CONFIG_BROKETS)
26 /* We use <config.h> instead of "config.h" so that a compilation
27 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
28 (which it would do because it found this file in $srcdir). */
37 #include <sys/types.h>
44 static int convint ();
45 static int isdigits ();
46 static int stdread ();
47 static void line_bytes_split ();
48 static void bytes_split ();
49 static void cwrite ();
50 static void lines_split ();
51 static void next_file_name ();
53 /* The name this program was run with. */
56 /* Base name of output files. */
59 /* Pointer to the end of the prefix in OUTFILE.
60 Suffixes are inserted here. */
61 static char *outfile_mid;
63 /* Pointer to the end of OUTFILE. */
64 static char *outfile_end;
66 /* Status for outfile name generation. */
67 static unsigned outfile_count = -1;
68 static unsigned outfile_name_limit = 25 * 26;
69 static unsigned outfile_name_generation = 1;
71 /* Name of input file. May be "-". */
74 /* Descriptor on which input file is open. */
75 static int input_desc;
77 /* Descriptor on which output file is open. */
78 static int output_desc;
80 /* If non-zero, display usage information and exit. */
83 /* If non-zero, print the version on standard output then exit. */
84 static int show_version;
86 static struct option const longopts[] =
88 {"bytes", required_argument, NULL, 'b'},
89 {"lines", required_argument, NULL, 'l'},
90 {"line-bytes", required_argument, NULL, 'C'},
91 {"help", no_argument, &show_help, 1},
92 {"version", no_argument, &show_version, 1},
97 usage (status, reason)
102 fprintf (stderr, "%s: %s\n", program_name, reason);
105 fprintf (stderr, "Try `%s --help' for more information.\n",
110 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
115 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
116 -b, --bytes=SIZE put SIZE bytes per output file\n\
117 -l, --lines=NUMBER put NUMBER lines per output file\n\
118 -NUMBER same as -l NUMBER\n\
119 --help display this help and exit\n\
120 --version output version information and exit\n\
122 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
123 With no PREFIX, use x. With no INPUT, or when INPUT is -, read\n\
135 struct stat stat_buf;
136 int num; /* numeric argument from command line */
139 type_undef, type_bytes, type_byteslines, type_lines, type_digits
140 } split_type = type_undef;
141 int in_blk_size; /* optimal block size of input file device */
142 char *buf; /* file i/o buffer */
146 int digits_optind = 0;
148 program_name = argv[0];
150 /* Parse command line options. */
157 /* This is the argv-index of the option we will read next. */
158 int this_optind = optind ? optind : 1;
160 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
170 if (split_type != type_undef)
171 usage (2, "cannot split in more than one way");
172 split_type = type_bytes;
173 if (convint (optarg, &accum) == -1)
174 usage (2, "invalid number of bytes");
178 if (split_type != type_undef)
179 usage (2, "cannot split in more than one way");
180 split_type = type_lines;
181 if (!isdigits (optarg))
182 usage (2, "invalid number of lines");
183 accum = atoi (optarg);
187 if (split_type != type_undef)
188 usage (2, "cannot split in more than one way");
189 split_type = type_byteslines;
190 if (convint (optarg, &accum) == -1)
191 usage (2, "invalid number of bytes");
204 if (split_type != type_undef && split_type != type_digits)
205 usage (2, "cannot split in more than one way");
206 if (digits_optind != 0 && digits_optind != this_optind)
207 accum = 0; /* More than one number given; ignore other. */
208 digits_optind = this_optind;
209 split_type = type_digits;
210 accum = accum * 10 + c - '0';
214 usage (2, (char *)0);
220 printf ("%s\n", version_string);
225 usage (0, (char *)0);
227 /* Handle default case. */
228 if (split_type == type_undef)
230 split_type = type_lines;
235 usage (2, "invalid number");
238 /* Get out the filename arguments. */
241 infile = argv[optind++];
244 outbase = argv[optind++];
247 usage (2, "too many arguments");
249 /* Open the input file. */
250 if (!strcmp (infile, "-"))
254 input_desc = open (infile, O_RDONLY);
256 error (1, errno, "%s", infile);
259 /* No output file is open now. */
262 /* Copy the output file prefix so we can add suffixes to it.
263 26**29 is certainly enough output files! */
265 outfile = xmalloc (strlen (outbase) + 30);
266 strcpy (outfile, outbase);
267 outfile_mid = outfile + strlen (outfile);
268 outfile_end = outfile_mid + 2;
269 bzero (outfile_mid, 30);
270 outfile_mid[0] = 'a';
271 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
273 /* Get the optimal block size of input device and make a buffer. */
275 if (fstat (input_desc, &stat_buf) < 0)
276 error (1, errno, "%s", infile);
277 in_blk_size = ST_BLKSIZE (stat_buf);
279 buf = xmalloc (in_blk_size + 1);
285 lines_split (num, buf, in_blk_size);
289 bytes_split (num, buf, in_blk_size);
292 case type_byteslines:
293 line_bytes_split (num);
300 if (close (input_desc) < 0)
301 error (1, errno, "%s", infile);
302 if (output_desc >= 0 && close (output_desc) < 0)
303 error (1, errno, "%s", outfile);
308 /* Return nonzero if the string STR is composed entirely of decimal digits. */
324 /* Put the value of the number in STR into *VAL.
325 STR can specify a positive integer, optionally ending in `k'
326 to mean kilo or `m' to mean mega.
327 Return 0 if STR is valid, -1 if not. */
335 int arglen = strlen (str);
339 switch (str[arglen - 1])
343 str[arglen - 1] = '\0';
347 str[arglen - 1] = '\0';
350 multiplier = 1048576;
351 str[arglen - 1] = '\0';
357 *val = atoi (str) * multiplier;
361 /* Split into pieces of exactly NCHARS bytes.
362 Use buffer BUF, whose size is BUFSIZE. */
365 bytes_split (nchars, buf, bufsize)
371 int new_file_flag = 1;
373 int to_write = nchars;
378 n_read = stdread (buf, bufsize);
380 error (1, errno, "%s", infile);
385 if (to_read < to_write)
387 if (to_read) /* do not write 0 bytes! */
389 cwrite (new_file_flag, bp_out, to_read);
397 cwrite (new_file_flag, bp_out, to_write);
405 while (n_read == bufsize);
408 /* Split into pieces of exactly NLINES lines.
409 Use buffer BUF, whose size is BUFSIZE. */
412 lines_split (nlines, buf, bufsize)
418 char *bp, *bp_out, *eob;
419 int new_file_flag = 1;
424 n_read = stdread (buf, bufsize);
426 error (1, errno, "%s", infile);
432 while (*bp++ != '\n')
433 ; /* this semicolon takes most of the time */
436 if (eob != bp_out) /* do not write 0 bytes! */
438 cwrite (new_file_flag, bp_out, eob - bp_out);
446 cwrite (new_file_flag, bp_out, bp - bp_out);
453 while (n_read == bufsize);
456 /* Split into pieces that are as large as possible while still not more
457 than NCHARS bytes, and are split on line boundaries except
458 where lines longer than NCHARS bytes occur. */
461 line_bytes_split (nchars)
468 char *buf = (char *) xmalloc (nchars);
472 /* Fill up the full buffer size from the input file. */
474 n_read = stdread (buf + n_buffered, nchars - n_buffered);
476 error (1, errno, "%s", infile);
478 n_buffered += n_read;
479 if (n_buffered != nchars)
482 /* Find where to end this chunk. */
483 bp = buf + n_buffered;
484 if (n_buffered == nchars)
486 while (bp > buf && bp[-1] != '\n')
490 /* If chunk has no newlines, use all the chunk. */
492 bp = buf + n_buffered;
494 /* Output the chars as one output file. */
495 cwrite (1, buf, bp - buf);
497 /* Discard the chars we just output; move rest of chunk
498 down to be the start of the next chunk. */
499 n_buffered -= bp - buf;
501 bcopy (bp, buf, n_buffered);
507 /* Write BYTES bytes at BP to an output file.
508 If NEW_FILE_FLAG is nonzero, open the next output file.
509 Otherwise add to the same output file already in use. */
512 cwrite (new_file_flag, bp, bytes)
519 if (output_desc >= 0 && close (output_desc) < 0)
520 error (1, errno, "%s", outfile);
523 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
525 error (1, errno, "%s", outfile);
527 if (write (output_desc, bp, bytes) < 0)
528 error (1, errno, "%s", outfile);
531 /* Read NCHARS bytes from the input file into BUF.
532 Return the number of bytes successfully read.
533 If this is less than NCHARS, do not call `stdread' again. */
536 stdread (buf, nchars)
541 int to_be_read = nchars;
545 n_read = read (input_desc, buf, to_be_read);
550 to_be_read -= n_read;
553 return nchars - to_be_read;
556 /* Compute the next sequential output file name suffix and store it
557 into the string `outfile' at the position pointed to by `outfile_mid'. */
566 if (outfile_count < outfile_name_limit)
568 for (ne = outfile_end - 1; ; ne--)
580 outfile_name_limit *= 26;
581 outfile_name_generation++;
582 *outfile_mid++ = 'z';
583 for (x = 0; x <= outfile_name_generation; x++)
584 outfile_mid[x] = 'a';