1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
27 #include <sys/types.h>
33 static int convint ();
34 static int isdigits ();
35 static int stdread ();
36 static void line_bytes_split ();
37 static void bytes_split ();
38 static void cwrite ();
39 static void lines_split ();
40 static void next_file_name ();
42 /* Name under which this program was invoked. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end;
55 /* Status for outfile name generation. */
56 static unsigned outfile_count = -1;
57 static unsigned outfile_name_limit = 25 * 26;
58 static unsigned outfile_name_generation = 1;
60 /* Name of input file. May be "-". */
63 /* Descriptor on which input file is open. */
64 static int input_desc;
66 /* Descriptor on which output file is open. */
67 static int output_desc;
74 fprintf (stderr, "%s: %s\n", program_name, reason);
76 Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
77 [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
78 [infile [outfile-prefix]]\n",
83 static struct option const longopts[] =
85 {"bytes", 1, NULL, 'b'},
86 {"lines", 1, NULL, 'l'},
87 {"line-bytes", 1, NULL, 'C'},
97 int num; /* numeric argument from command line */
100 type_undef, type_bytes, type_byteslines, type_lines, type_digits
101 } split_type = type_undef;
102 int in_blk_size; /* optimal block size of input file device */
103 char *buf; /* file i/o buffer */
107 int digits_optind = 0;
109 program_name = argv[0];
111 /* Parse command line options. */
118 /* This is the argv-index of the option we will read next. */
119 int this_optind = optind ? optind : 1;
121 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
128 if (split_type != type_undef)
129 usage ("cannot split in more than one way");
130 split_type = type_bytes;
131 if (convint (optarg, &accum) == -1)
132 usage ("invalid number of bytes");
136 if (split_type != type_undef)
137 usage ("cannot split in more than one way");
138 split_type = type_lines;
139 if (!isdigits (optarg))
140 usage ("invalid number of lines");
141 accum = atoi (optarg);
145 if (split_type != type_undef)
146 usage ("cannot split in more than one way");
147 split_type = type_byteslines;
148 if (convint (optarg, &accum) == -1)
149 usage ("invalid number of bytes");
162 if (split_type != type_undef && split_type != type_digits)
163 usage ("cannot split in more than one way");
164 if (digits_optind != 0 && digits_optind != this_optind)
165 accum = 0; /* More than one number given; ignore other. */
166 digits_optind = this_optind;
167 split_type = type_digits;
168 accum = accum * 10 + c - '0';
176 /* Handle default case. */
177 if (split_type == type_undef)
179 split_type = type_lines;
184 usage ("invalid number");
187 /* Get out the filename arguments. */
190 infile = argv[optind++];
193 outbase = argv[optind++];
196 usage ("too many arguments");
198 /* Open the input file. */
199 if (!strcmp (infile, "-"))
203 input_desc = open (infile, O_RDONLY);
205 error (1, errno, "%s", infile);
208 /* No output file is open now. */
211 /* Copy the output file prefix so we can add suffixes to it.
212 26**29 is certainly enough output files! */
214 outfile = xmalloc (strlen (outbase) + 30);
215 strcpy (outfile, outbase);
216 outfile_mid = outfile + strlen (outfile);
217 outfile_end = outfile_mid + 2;
218 bzero (outfile_mid, 30);
219 outfile_mid[0] = 'a';
220 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
222 /* Get the optimal block size of input device and make a buffer. */
224 if (fstat (input_desc, &stat_buf) < 0)
225 error (1, errno, "%s", infile);
226 in_blk_size = ST_BLKSIZE (stat_buf);
228 buf = xmalloc (in_blk_size + 1);
234 lines_split (num, buf, in_blk_size);
238 bytes_split (num, buf, in_blk_size);
241 case type_byteslines:
242 line_bytes_split (num);
249 if (close (input_desc) < 0)
250 error (1, errno, "%s", infile);
251 if (output_desc >= 0 && close (output_desc) < 0)
252 error (1, errno, "%s", outfile);
257 /* Return nonzero if the string STR is composed entirely of decimal digits. */
273 /* Put the value of the number in STR into *VAL.
274 STR can specify a positive integer, optionally ending in `k'
275 to mean kilo or `m' to mean mega.
276 Return 0 if STR is valid, -1 if not. */
284 int arglen = strlen (str);
288 switch (str[arglen - 1])
292 str[arglen - 1] = '\0';
296 str[arglen - 1] = '\0';
299 multiplier = 1048576;
300 str[arglen - 1] = '\0';
306 *val = atoi (str) * multiplier;
310 /* Split into pieces of exactly NCHARS bytes.
311 Use buffer BUF, whose size is BUFSIZE. */
314 bytes_split (nchars, buf, bufsize)
320 int new_file_flag = 1;
322 int to_write = nchars;
327 n_read = stdread (buf, bufsize);
329 error (1, errno, "%s", infile);
334 if (to_read < to_write)
336 if (to_read) /* do not write 0 bytes! */
338 cwrite (new_file_flag, bp_out, to_read);
346 cwrite (new_file_flag, bp_out, to_write);
354 while (n_read == bufsize);
357 /* Split into pieces of exactly NLINES lines.
358 Use buffer BUF, whose size is BUFSIZE. */
361 lines_split (nlines, buf, bufsize)
367 char *bp, *bp_out, *eob;
368 int new_file_flag = 1;
373 n_read = stdread (buf, bufsize);
375 error (1, errno, "%s", infile);
381 while (*bp++ != '\n')
382 ; /* this semicolon takes most of the time */
385 if (eob != bp_out) /* do not write 0 bytes! */
387 cwrite (new_file_flag, bp_out, eob - bp_out);
395 cwrite (new_file_flag, bp_out, bp - bp_out);
402 while (n_read == bufsize);
405 /* Split into pieces that are as large as possible while still not more
406 than NCHARS bytes, and are split on line boundaries except
407 where lines longer than NCHARS bytes occur. */
410 line_bytes_split (nchars)
417 char *buf = (char *) xmalloc (nchars);
421 /* Fill up the full buffer size from the input file. */
423 n_read = stdread (buf + n_buffered, nchars - n_buffered);
425 error (1, errno, "%s", infile);
427 n_buffered += n_read;
428 if (n_buffered != nchars)
431 /* Find where to end this chunk. */
432 bp = buf + n_buffered;
433 if (n_buffered == nchars)
435 while (bp > buf && bp[-1] != '\n')
439 /* If chunk has no newlines, use all the chunk. */
441 bp = buf + n_buffered;
443 /* Output the chars as one output file. */
444 cwrite (1, buf, bp - buf);
446 /* Discard the chars we just output; move rest of chunk
447 down to be the start of the next chunk. */
448 n_buffered -= bp - buf;
450 bcopy (bp, buf, n_buffered);
456 /* Write BYTES bytes at BP to an output file.
457 If NEW_FILE_FLAG is nonzero, open the next output file.
458 Otherwise add to the same output file already in use. */
461 cwrite (new_file_flag, bp, bytes)
468 if (output_desc >= 0 && close (output_desc) < 0)
469 error (1, errno, "%s", outfile);
472 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
474 error (1, errno, "%s", outfile);
476 if (write (output_desc, bp, bytes) < 0)
477 error (1, errno, "%s", outfile);
480 /* Read NCHARS bytes from the input file into BUF.
481 Return the number of bytes successfully read.
482 If this is less than NCHARS, do not call `stdread' again. */
485 stdread (buf, nchars)
490 int to_be_read = nchars;
494 n_read = read (input_desc, buf, to_be_read);
499 to_be_read -= n_read;
502 return nchars - to_be_read;
505 /* Compute the next sequential output file name suffix and store it
506 into the string `outfile' at the position pointed to by `outfile_mid'. */
515 if (outfile_count < outfile_name_limit)
517 for (ne = outfile_end - 1; ; ne--)
529 outfile_name_limit *= 26;
530 outfile_name_generation++;
531 *outfile_mid++ = 'z';
532 for (x = 0; x <= outfile_name_generation; x++)
533 outfile_mid[x] = 'a';