1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
25 #if defined (CONFIG_BROKETS)
26 /* We use <config.h> instead of "config.h" so that a compilation
27 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
28 (which it would do because it found this file in $srcdir). */
37 #include <sys/types.h>
46 static int convint ();
47 static int isdigits ();
48 static int stdread ();
49 static void line_bytes_split ();
50 static void bytes_split ();
51 static void cwrite ();
52 static void lines_split ();
53 static void next_file_name ();
55 /* The name this program was run with. */
58 /* Base name of output files. */
61 /* Pointer to the end of the prefix in OUTFILE.
62 Suffixes are inserted here. */
63 static char *outfile_mid;
65 /* Pointer to the end of OUTFILE. */
66 static char *outfile_end;
68 /* Status for outfile name generation. */
69 static unsigned outfile_count = -1;
70 static unsigned outfile_name_limit = 25 * 26;
71 static unsigned outfile_name_generation = 1;
73 /* Name of input file. May be "-". */
76 /* Descriptor on which input file is open. */
77 static int input_desc;
79 /* Descriptor on which output file is open. */
80 static int output_desc;
82 /* If non-zero, display usage information and exit. */
85 /* If non-zero, print the version on standard output then exit. */
86 static int show_version;
88 static struct option const longopts[] =
90 {"bytes", required_argument, NULL, 'b'},
91 {"lines", required_argument, NULL, 'l'},
92 {"line-bytes", required_argument, NULL, 'C'},
93 {"help", no_argument, &show_help, 1},
94 {"version", no_argument, &show_version, 1},
99 usage (status, reason)
104 fprintf (stderr, "%s: %s\n", program_name, reason);
107 fprintf (stderr, "Try `%s --help' for more information.\n",
112 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
117 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
118 -b, --bytes=SIZE put SIZE bytes per output file\n\
119 -l, --lines=NUMBER put NUMBER lines per output file\n\
120 -NUMBER same as -l NUMBER\n\
121 --help display this help and exit\n\
122 --version output version information and exit\n\
124 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
125 With no PREFIX, use x. With no INPUT, or when INPUT is -, read\n\
137 struct stat stat_buf;
138 int num; /* numeric argument from command line */
141 type_undef, type_bytes, type_byteslines, type_lines, type_digits
142 } split_type = type_undef;
143 int in_blk_size; /* optimal block size of input file device */
144 char *buf; /* file i/o buffer */
148 int digits_optind = 0;
150 program_name = argv[0];
152 /* Parse command line options. */
159 /* This is the argv-index of the option we will read next. */
160 int this_optind = optind ? optind : 1;
162 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
172 if (split_type != type_undef)
173 usage (2, "cannot split in more than one way");
174 split_type = type_bytes;
175 if (convint (optarg, &accum) == -1)
176 usage (2, "invalid number of bytes");
180 if (split_type != type_undef)
181 usage (2, "cannot split in more than one way");
182 split_type = type_lines;
183 if (!isdigits (optarg))
184 usage (2, "invalid number of lines");
185 accum = atoi (optarg);
189 if (split_type != type_undef)
190 usage (2, "cannot split in more than one way");
191 split_type = type_byteslines;
192 if (convint (optarg, &accum) == -1)
193 usage (2, "invalid number of bytes");
206 if (split_type != type_undef && split_type != type_digits)
207 usage (2, "cannot split in more than one way");
208 if (digits_optind != 0 && digits_optind != this_optind)
209 accum = 0; /* More than one number given; ignore other. */
210 digits_optind = this_optind;
211 split_type = type_digits;
212 accum = accum * 10 + c - '0';
216 usage (2, (char *)0);
222 printf ("%s\n", version_string);
227 usage (0, (char *)0);
229 /* Handle default case. */
230 if (split_type == type_undef)
232 split_type = type_lines;
237 usage (2, "invalid number");
240 /* Get out the filename arguments. */
243 infile = argv[optind++];
246 outbase = argv[optind++];
249 usage (2, "too many arguments");
251 /* Open the input file. */
252 if (!strcmp (infile, "-"))
256 input_desc = open (infile, O_RDONLY);
258 error (1, errno, "%s", infile);
261 /* No output file is open now. */
264 /* Copy the output file prefix so we can add suffixes to it.
265 26**29 is certainly enough output files! */
267 outfile = xmalloc (strlen (outbase) + 30);
268 strcpy (outfile, outbase);
269 outfile_mid = outfile + strlen (outfile);
270 outfile_end = outfile_mid + 2;
271 bzero (outfile_mid, 30);
272 outfile_mid[0] = 'a';
273 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
275 /* Get the optimal block size of input device and make a buffer. */
277 if (fstat (input_desc, &stat_buf) < 0)
278 error (1, errno, "%s", infile);
279 in_blk_size = ST_BLKSIZE (stat_buf);
281 buf = xmalloc (in_blk_size + 1);
287 lines_split (num, buf, in_blk_size);
291 bytes_split (num, buf, in_blk_size);
294 case type_byteslines:
295 line_bytes_split (num);
302 if (close (input_desc) < 0)
303 error (1, errno, "%s", infile);
304 if (output_desc >= 0 && close (output_desc) < 0)
305 error (1, errno, "%s", outfile);
310 /* Return nonzero if the string STR is composed entirely of decimal digits. */
326 /* Put the value of the number in STR into *VAL.
327 STR can specify a positive integer, optionally ending in `k'
328 to mean kilo or `m' to mean mega.
329 Return 0 if STR is valid, -1 if not. */
337 int arglen = strlen (str);
341 switch (str[arglen - 1])
345 str[arglen - 1] = '\0';
349 str[arglen - 1] = '\0';
352 multiplier = 1048576;
353 str[arglen - 1] = '\0';
359 *val = atoi (str) * multiplier;
363 /* Split into pieces of exactly NCHARS bytes.
364 Use buffer BUF, whose size is BUFSIZE. */
367 bytes_split (nchars, buf, bufsize)
373 int new_file_flag = 1;
375 int to_write = nchars;
380 n_read = stdread (buf, bufsize);
382 error (1, errno, "%s", infile);
387 if (to_read < to_write)
389 if (to_read) /* do not write 0 bytes! */
391 cwrite (new_file_flag, bp_out, to_read);
399 cwrite (new_file_flag, bp_out, to_write);
407 while (n_read == bufsize);
410 /* Split into pieces of exactly NLINES lines.
411 Use buffer BUF, whose size is BUFSIZE. */
414 lines_split (nlines, buf, bufsize)
420 char *bp, *bp_out, *eob;
421 int new_file_flag = 1;
426 n_read = stdread (buf, bufsize);
428 error (1, errno, "%s", infile);
434 while (*bp++ != '\n')
435 ; /* this semicolon takes most of the time */
438 if (eob != bp_out) /* do not write 0 bytes! */
440 cwrite (new_file_flag, bp_out, eob - bp_out);
448 cwrite (new_file_flag, bp_out, bp - bp_out);
455 while (n_read == bufsize);
458 /* Split into pieces that are as large as possible while still not more
459 than NCHARS bytes, and are split on line boundaries except
460 where lines longer than NCHARS bytes occur. */
463 line_bytes_split (nchars)
470 char *buf = (char *) xmalloc (nchars);
474 /* Fill up the full buffer size from the input file. */
476 n_read = stdread (buf + n_buffered, nchars - n_buffered);
478 error (1, errno, "%s", infile);
480 n_buffered += n_read;
481 if (n_buffered != nchars)
484 /* Find where to end this chunk. */
485 bp = buf + n_buffered;
486 if (n_buffered == nchars)
488 while (bp > buf && bp[-1] != '\n')
492 /* If chunk has no newlines, use all the chunk. */
494 bp = buf + n_buffered;
496 /* Output the chars as one output file. */
497 cwrite (1, buf, bp - buf);
499 /* Discard the chars we just output; move rest of chunk
500 down to be the start of the next chunk. */
501 n_buffered -= bp - buf;
503 bcopy (bp, buf, n_buffered);
509 /* Write BYTES bytes at BP to an output file.
510 If NEW_FILE_FLAG is nonzero, open the next output file.
511 Otherwise add to the same output file already in use. */
514 cwrite (new_file_flag, bp, bytes)
521 if (output_desc >= 0 && close (output_desc) < 0)
522 error (1, errno, "%s", outfile);
525 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
527 error (1, errno, "%s", outfile);
529 if (full_write (output_desc, bp, bytes) < 0)
530 error (1, errno, "%s", outfile);
533 /* Read NCHARS bytes from the input file into BUF.
534 Return the number of bytes successfully read.
535 If this is less than NCHARS, do not call `stdread' again. */
538 stdread (buf, nchars)
543 int to_be_read = nchars;
547 n_read = safe_read (input_desc, buf, to_be_read);
552 to_be_read -= n_read;
555 return nchars - to_be_read;
558 /* Compute the next sequential output file name suffix and store it
559 into the string `outfile' at the position pointed to by `outfile_mid'. */
568 if (outfile_count < outfile_name_limit)
570 for (ne = outfile_end - 1; ; ne--)
582 outfile_name_limit *= 26;
583 outfile_name_generation++;
584 *outfile_mid++ = 'z';
585 for (x = 0; x <= outfile_name_generation; x++)
586 outfile_mid[x] = 'a';