1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
26 #include <sys/types.h>
32 static int convint ();
33 static int isdigits ();
34 static int stdread ();
35 static void line_bytes_split ();
36 static void bytes_split ();
37 static void cwrite ();
38 static void lines_split ();
39 static void next_file_name ();
41 /* Name under which this program was invoked. */
44 /* Base name of output files. */
47 /* Pointer to the end of the prefix in OUTFILE.
48 Suffixes are inserted here. */
49 static char *outfile_mid;
51 /* Pointer to the end of OUTFILE. */
52 static char *outfile_end;
54 /* Status for outfile name generation. */
55 static unsigned outfile_count = -1;
56 static unsigned outfile_name_limit = 25 * 26;
57 static unsigned outfile_name_generation = 1;
59 /* Name of input file. May be "-". */
62 /* Descriptor on which input file is open. */
63 static int input_desc;
65 /* Descriptor on which output file is open. */
66 static int output_desc;
73 fprintf (stderr, "%s: %s\n", program_name, reason);
75 Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
76 [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
77 [infile [outfile-prefix]]\n",
82 static struct option const longopts[] =
84 {"bytes", required_argument, NULL, 'b'},
85 {"lines", required_argument, NULL, 'l'},
86 {"line-bytes", required_argument, NULL, 'C'},
96 int num; /* numeric argument from command line */
99 type_undef, type_bytes, type_byteslines, type_lines, type_digits
100 } split_type = type_undef;
101 int in_blk_size; /* optimal block size of input file device */
102 char *buf; /* file i/o buffer */
106 int digits_optind = 0;
108 program_name = argv[0];
110 /* Parse command line options. */
117 /* This is the argv-index of the option we will read next. */
118 int this_optind = optind ? optind : 1;
120 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
127 if (split_type != type_undef)
128 usage ("cannot split in more than one way");
129 split_type = type_bytes;
130 if (convint (optarg, &accum) == -1)
131 usage ("invalid number of bytes");
135 if (split_type != type_undef)
136 usage ("cannot split in more than one way");
137 split_type = type_lines;
138 if (!isdigits (optarg))
139 usage ("invalid number of lines");
140 accum = atoi (optarg);
144 if (split_type != type_undef)
145 usage ("cannot split in more than one way");
146 split_type = type_byteslines;
147 if (convint (optarg, &accum) == -1)
148 usage ("invalid number of bytes");
161 if (split_type != type_undef && split_type != type_digits)
162 usage ("cannot split in more than one way");
163 if (digits_optind != 0 && digits_optind != this_optind)
164 accum = 0; /* More than one number given; ignore other. */
165 digits_optind = this_optind;
166 split_type = type_digits;
167 accum = accum * 10 + c - '0';
175 /* Handle default case. */
176 if (split_type == type_undef)
178 split_type = type_lines;
183 usage ("invalid number");
186 /* Get out the filename arguments. */
189 infile = argv[optind++];
192 outbase = argv[optind++];
195 usage ("too many arguments");
197 /* Open the input file. */
198 if (!strcmp (infile, "-"))
202 input_desc = open (infile, O_RDONLY);
204 error (1, errno, "%s", infile);
207 /* No output file is open now. */
210 /* Copy the output file prefix so we can add suffixes to it.
211 26**29 is certainly enough output files! */
213 outfile = xmalloc (strlen (outbase) + 30);
214 strcpy (outfile, outbase);
215 outfile_mid = outfile + strlen (outfile);
216 outfile_end = outfile_mid + 2;
217 bzero (outfile_mid, 30);
218 outfile_mid[0] = 'a';
219 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
221 /* Get the optimal block size of input device and make a buffer. */
223 if (fstat (input_desc, &stat_buf) < 0)
224 error (1, errno, "%s", infile);
225 in_blk_size = ST_BLKSIZE (stat_buf);
227 buf = xmalloc (in_blk_size + 1);
233 lines_split (num, buf, in_blk_size);
237 bytes_split (num, buf, in_blk_size);
240 case type_byteslines:
241 line_bytes_split (num);
248 if (close (input_desc) < 0)
249 error (1, errno, "%s", infile);
250 if (output_desc >= 0 && close (output_desc) < 0)
251 error (1, errno, "%s", outfile);
256 /* Return nonzero if the string STR is composed entirely of decimal digits. */
272 /* Put the value of the number in STR into *VAL.
273 STR can specify a positive integer, optionally ending in `k'
274 to mean kilo or `m' to mean mega.
275 Return 0 if STR is valid, -1 if not. */
283 int arglen = strlen (str);
287 switch (str[arglen - 1])
291 str[arglen - 1] = '\0';
295 str[arglen - 1] = '\0';
298 multiplier = 1048576;
299 str[arglen - 1] = '\0';
305 *val = atoi (str) * multiplier;
309 /* Split into pieces of exactly NCHARS bytes.
310 Use buffer BUF, whose size is BUFSIZE. */
313 bytes_split (nchars, buf, bufsize)
319 int new_file_flag = 1;
321 int to_write = nchars;
326 n_read = stdread (buf, bufsize);
328 error (1, errno, "%s", infile);
333 if (to_read < to_write)
335 if (to_read) /* do not write 0 bytes! */
337 cwrite (new_file_flag, bp_out, to_read);
345 cwrite (new_file_flag, bp_out, to_write);
353 while (n_read == bufsize);
356 /* Split into pieces of exactly NLINES lines.
357 Use buffer BUF, whose size is BUFSIZE. */
360 lines_split (nlines, buf, bufsize)
366 char *bp, *bp_out, *eob;
367 int new_file_flag = 1;
372 n_read = stdread (buf, bufsize);
374 error (1, errno, "%s", infile);
380 while (*bp++ != '\n')
381 ; /* this semicolon takes most of the time */
384 if (eob != bp_out) /* do not write 0 bytes! */
386 cwrite (new_file_flag, bp_out, eob - bp_out);
394 cwrite (new_file_flag, bp_out, bp - bp_out);
401 while (n_read == bufsize);
404 /* Split into pieces that are as large as possible while still not more
405 than NCHARS bytes, and are split on line boundaries except
406 where lines longer than NCHARS bytes occur. */
409 line_bytes_split (nchars)
416 char *buf = (char *) xmalloc (nchars);
420 /* Fill up the full buffer size from the input file. */
422 n_read = stdread (buf + n_buffered, nchars - n_buffered);
424 error (1, errno, "%s", infile);
426 n_buffered += n_read;
427 if (n_buffered != nchars)
430 /* Find where to end this chunk. */
431 bp = buf + n_buffered;
432 if (n_buffered == nchars)
434 while (bp > buf && bp[-1] != '\n')
438 /* If chunk has no newlines, use all the chunk. */
440 bp = buf + n_buffered;
442 /* Output the chars as one output file. */
443 cwrite (1, buf, bp - buf);
445 /* Discard the chars we just output; move rest of chunk
446 down to be the start of the next chunk. */
447 n_buffered -= bp - buf;
449 bcopy (bp, buf, n_buffered);
455 /* Write BYTES bytes at BP to an output file.
456 If NEW_FILE_FLAG is nonzero, open the next output file.
457 Otherwise add to the same output file already in use. */
460 cwrite (new_file_flag, bp, bytes)
467 if (output_desc >= 0 && close (output_desc) < 0)
468 error (1, errno, "%s", outfile);
471 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
473 error (1, errno, "%s", outfile);
475 if (write (output_desc, bp, bytes) < 0)
476 error (1, errno, "%s", outfile);
479 /* Read NCHARS bytes from the input file into BUF.
480 Return the number of bytes successfully read.
481 If this is less than NCHARS, do not call `stdread' again. */
484 stdread (buf, nchars)
489 int to_be_read = nchars;
493 n_read = read (input_desc, buf, to_be_read);
498 to_be_read -= n_read;
501 return nchars - to_be_read;
504 /* Compute the next sequential output file name suffix and store it
505 into the string `outfile' at the position pointed to by `outfile_mid'. */
514 if (outfile_count < outfile_name_limit)
516 for (ne = outfile_end - 1; ; ne--)
528 outfile_name_limit *= 26;
529 outfile_name_generation++;
530 *outfile_mid++ = 'z';
531 for (x = 0; x <= outfile_name_generation; x++)
532 outfile_mid[x] = 'a';