1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
26 #include <sys/types.h>
33 static int convint ();
34 static int isdigits ();
35 static int stdread ();
36 static void line_bytes_split ();
37 static void bytes_split ();
38 static void cwrite ();
39 static void lines_split ();
40 static void next_file_name ();
42 /* The name this program was run with. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end;
55 /* Status for outfile name generation. */
56 static unsigned outfile_count = -1;
57 static unsigned outfile_name_limit = 25 * 26;
58 static unsigned outfile_name_generation = 1;
60 /* Name of input file. May be "-". */
63 /* Descriptor on which input file is open. */
64 static int input_desc;
66 /* Descriptor on which output file is open. */
67 static int output_desc;
69 /* If non-zero, display usage information and exit. */
72 /* If non-zero, print the version on standard error. */
73 static int flag_version;
75 static struct option const longopts[] =
77 {"bytes", required_argument, NULL, 'b'},
78 {"lines", required_argument, NULL, 'l'},
79 {"line-bytes", required_argument, NULL, 'C'},
80 {"help", no_argument, &flag_help, 1},
81 {"version", no_argument, &flag_version, 1},
90 fprintf (stderr, "%s: %s\n", program_name, reason);
92 Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
93 [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
94 [--help] [--version] [infile [outfile-prefix]]\n",
104 struct stat stat_buf;
105 int num; /* numeric argument from command line */
108 type_undef, type_bytes, type_byteslines, type_lines, type_digits
109 } split_type = type_undef;
110 int in_blk_size; /* optimal block size of input file device */
111 char *buf; /* file i/o buffer */
115 int digits_optind = 0;
117 program_name = argv[0];
119 /* Parse command line options. */
126 /* This is the argv-index of the option we will read next. */
127 int this_optind = optind ? optind : 1;
129 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
139 if (split_type != type_undef)
140 usage ("cannot split in more than one way");
141 split_type = type_bytes;
142 if (convint (optarg, &accum) == -1)
143 usage ("invalid number of bytes");
147 if (split_type != type_undef)
148 usage ("cannot split in more than one way");
149 split_type = type_lines;
150 if (!isdigits (optarg))
151 usage ("invalid number of lines");
152 accum = atoi (optarg);
156 if (split_type != type_undef)
157 usage ("cannot split in more than one way");
158 split_type = type_byteslines;
159 if (convint (optarg, &accum) == -1)
160 usage ("invalid number of bytes");
173 if (split_type != type_undef && split_type != type_digits)
174 usage ("cannot split in more than one way");
175 if (digits_optind != 0 && digits_optind != this_optind)
176 accum = 0; /* More than one number given; ignore other. */
177 digits_optind = this_optind;
178 split_type = type_digits;
179 accum = accum * 10 + c - '0';
188 fprintf (stderr, "%s\n", version_string);
193 /* Handle default case. */
194 if (split_type == type_undef)
196 split_type = type_lines;
201 usage ("invalid number");
204 /* Get out the filename arguments. */
207 infile = argv[optind++];
210 outbase = argv[optind++];
213 usage ("too many arguments");
215 /* Open the input file. */
216 if (!strcmp (infile, "-"))
220 input_desc = open (infile, O_RDONLY);
222 error (1, errno, "%s", infile);
225 /* No output file is open now. */
228 /* Copy the output file prefix so we can add suffixes to it.
229 26**29 is certainly enough output files! */
231 outfile = xmalloc (strlen (outbase) + 30);
232 strcpy (outfile, outbase);
233 outfile_mid = outfile + strlen (outfile);
234 outfile_end = outfile_mid + 2;
235 bzero (outfile_mid, 30);
236 outfile_mid[0] = 'a';
237 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
239 /* Get the optimal block size of input device and make a buffer. */
241 if (fstat (input_desc, &stat_buf) < 0)
242 error (1, errno, "%s", infile);
243 in_blk_size = ST_BLKSIZE (stat_buf);
245 buf = xmalloc (in_blk_size + 1);
251 lines_split (num, buf, in_blk_size);
255 bytes_split (num, buf, in_blk_size);
258 case type_byteslines:
259 line_bytes_split (num);
266 if (close (input_desc) < 0)
267 error (1, errno, "%s", infile);
268 if (output_desc >= 0 && close (output_desc) < 0)
269 error (1, errno, "%s", outfile);
274 /* Return nonzero if the string STR is composed entirely of decimal digits. */
290 /* Put the value of the number in STR into *VAL.
291 STR can specify a positive integer, optionally ending in `k'
292 to mean kilo or `m' to mean mega.
293 Return 0 if STR is valid, -1 if not. */
301 int arglen = strlen (str);
305 switch (str[arglen - 1])
309 str[arglen - 1] = '\0';
313 str[arglen - 1] = '\0';
316 multiplier = 1048576;
317 str[arglen - 1] = '\0';
323 *val = atoi (str) * multiplier;
327 /* Split into pieces of exactly NCHARS bytes.
328 Use buffer BUF, whose size is BUFSIZE. */
331 bytes_split (nchars, buf, bufsize)
337 int new_file_flag = 1;
339 int to_write = nchars;
344 n_read = stdread (buf, bufsize);
346 error (1, errno, "%s", infile);
351 if (to_read < to_write)
353 if (to_read) /* do not write 0 bytes! */
355 cwrite (new_file_flag, bp_out, to_read);
363 cwrite (new_file_flag, bp_out, to_write);
371 while (n_read == bufsize);
374 /* Split into pieces of exactly NLINES lines.
375 Use buffer BUF, whose size is BUFSIZE. */
378 lines_split (nlines, buf, bufsize)
384 char *bp, *bp_out, *eob;
385 int new_file_flag = 1;
390 n_read = stdread (buf, bufsize);
392 error (1, errno, "%s", infile);
398 while (*bp++ != '\n')
399 ; /* this semicolon takes most of the time */
402 if (eob != bp_out) /* do not write 0 bytes! */
404 cwrite (new_file_flag, bp_out, eob - bp_out);
412 cwrite (new_file_flag, bp_out, bp - bp_out);
419 while (n_read == bufsize);
422 /* Split into pieces that are as large as possible while still not more
423 than NCHARS bytes, and are split on line boundaries except
424 where lines longer than NCHARS bytes occur. */
427 line_bytes_split (nchars)
434 char *buf = (char *) xmalloc (nchars);
438 /* Fill up the full buffer size from the input file. */
440 n_read = stdread (buf + n_buffered, nchars - n_buffered);
442 error (1, errno, "%s", infile);
444 n_buffered += n_read;
445 if (n_buffered != nchars)
448 /* Find where to end this chunk. */
449 bp = buf + n_buffered;
450 if (n_buffered == nchars)
452 while (bp > buf && bp[-1] != '\n')
456 /* If chunk has no newlines, use all the chunk. */
458 bp = buf + n_buffered;
460 /* Output the chars as one output file. */
461 cwrite (1, buf, bp - buf);
463 /* Discard the chars we just output; move rest of chunk
464 down to be the start of the next chunk. */
465 n_buffered -= bp - buf;
467 bcopy (bp, buf, n_buffered);
473 /* Write BYTES bytes at BP to an output file.
474 If NEW_FILE_FLAG is nonzero, open the next output file.
475 Otherwise add to the same output file already in use. */
478 cwrite (new_file_flag, bp, bytes)
485 if (output_desc >= 0 && close (output_desc) < 0)
486 error (1, errno, "%s", outfile);
489 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
491 error (1, errno, "%s", outfile);
493 if (write (output_desc, bp, bytes) < 0)
494 error (1, errno, "%s", outfile);
497 /* Read NCHARS bytes from the input file into BUF.
498 Return the number of bytes successfully read.
499 If this is less than NCHARS, do not call `stdread' again. */
502 stdread (buf, nchars)
507 int to_be_read = nchars;
511 n_read = read (input_desc, buf, to_be_read);
516 to_be_read -= n_read;
519 return nchars - to_be_read;
522 /* Compute the next sequential output file name suffix and store it
523 into the string `outfile' at the position pointed to by `outfile_mid'. */
532 if (outfile_count < outfile_name_limit)
534 for (ne = outfile_end - 1; ; ne--)
546 outfile_name_limit *= 26;
547 outfile_name_generation++;
548 *outfile_mid++ = 'z';
549 for (x = 0; x <= outfile_name_generation; x++)
550 outfile_mid[x] = 'a';