1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
25 #if defined (CONFIG_BROKETS)
26 /* We use <config.h> instead of "config.h" so that a compilation
27 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
28 (which it would do because it found this file in $srcdir). */
37 #include <sys/types.h>
44 static int convint ();
45 static int isdigits ();
46 static int stdread ();
47 static void line_bytes_split ();
48 static void bytes_split ();
49 static void cwrite ();
50 static void lines_split ();
51 static void next_file_name ();
53 /* The name this program was run with. */
56 /* Base name of output files. */
59 /* Pointer to the end of the prefix in OUTFILE.
60 Suffixes are inserted here. */
61 static char *outfile_mid;
63 /* Pointer to the end of OUTFILE. */
64 static char *outfile_end;
66 /* Status for outfile name generation. */
67 static unsigned outfile_count = -1;
68 static unsigned outfile_name_limit = 25 * 26;
69 static unsigned outfile_name_generation = 1;
71 /* Name of input file. May be "-". */
74 /* Descriptor on which input file is open. */
75 static int input_desc;
77 /* Descriptor on which output file is open. */
78 static int output_desc;
80 /* If non-zero, display usage information and exit. */
83 /* If non-zero, print the version on standard output then exit. */
84 static int show_version;
86 static struct option const longopts[] =
88 {"bytes", required_argument, NULL, 'b'},
89 {"lines", required_argument, NULL, 'l'},
90 {"line-bytes", required_argument, NULL, 'C'},
91 {"help", no_argument, &show_help, 1},
92 {"version", no_argument, &show_version, 1},
101 fprintf (stderr, "%s: %s\n", program_name, reason);
103 Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
104 [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
105 [--help] [--version] [infile [outfile-prefix]]\n",
115 struct stat stat_buf;
116 int num; /* numeric argument from command line */
119 type_undef, type_bytes, type_byteslines, type_lines, type_digits
120 } split_type = type_undef;
121 int in_blk_size; /* optimal block size of input file device */
122 char *buf; /* file i/o buffer */
126 int digits_optind = 0;
128 program_name = argv[0];
130 /* Parse command line options. */
137 /* This is the argv-index of the option we will read next. */
138 int this_optind = optind ? optind : 1;
140 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
150 if (split_type != type_undef)
151 usage ("cannot split in more than one way");
152 split_type = type_bytes;
153 if (convint (optarg, &accum) == -1)
154 usage ("invalid number of bytes");
158 if (split_type != type_undef)
159 usage ("cannot split in more than one way");
160 split_type = type_lines;
161 if (!isdigits (optarg))
162 usage ("invalid number of lines");
163 accum = atoi (optarg);
167 if (split_type != type_undef)
168 usage ("cannot split in more than one way");
169 split_type = type_byteslines;
170 if (convint (optarg, &accum) == -1)
171 usage ("invalid number of bytes");
184 if (split_type != type_undef && split_type != type_digits)
185 usage ("cannot split in more than one way");
186 if (digits_optind != 0 && digits_optind != this_optind)
187 accum = 0; /* More than one number given; ignore other. */
188 digits_optind = this_optind;
189 split_type = type_digits;
190 accum = accum * 10 + c - '0';
200 printf ("%s\n", version_string);
207 /* Handle default case. */
208 if (split_type == type_undef)
210 split_type = type_lines;
215 usage ("invalid number");
218 /* Get out the filename arguments. */
221 infile = argv[optind++];
224 outbase = argv[optind++];
227 usage ("too many arguments");
229 /* Open the input file. */
230 if (!strcmp (infile, "-"))
234 input_desc = open (infile, O_RDONLY);
236 error (1, errno, "%s", infile);
239 /* No output file is open now. */
242 /* Copy the output file prefix so we can add suffixes to it.
243 26**29 is certainly enough output files! */
245 outfile = xmalloc (strlen (outbase) + 30);
246 strcpy (outfile, outbase);
247 outfile_mid = outfile + strlen (outfile);
248 outfile_end = outfile_mid + 2;
249 bzero (outfile_mid, 30);
250 outfile_mid[0] = 'a';
251 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
253 /* Get the optimal block size of input device and make a buffer. */
255 if (fstat (input_desc, &stat_buf) < 0)
256 error (1, errno, "%s", infile);
257 in_blk_size = ST_BLKSIZE (stat_buf);
259 buf = xmalloc (in_blk_size + 1);
265 lines_split (num, buf, in_blk_size);
269 bytes_split (num, buf, in_blk_size);
272 case type_byteslines:
273 line_bytes_split (num);
280 if (close (input_desc) < 0)
281 error (1, errno, "%s", infile);
282 if (output_desc >= 0 && close (output_desc) < 0)
283 error (1, errno, "%s", outfile);
288 /* Return nonzero if the string STR is composed entirely of decimal digits. */
304 /* Put the value of the number in STR into *VAL.
305 STR can specify a positive integer, optionally ending in `k'
306 to mean kilo or `m' to mean mega.
307 Return 0 if STR is valid, -1 if not. */
315 int arglen = strlen (str);
319 switch (str[arglen - 1])
323 str[arglen - 1] = '\0';
327 str[arglen - 1] = '\0';
330 multiplier = 1048576;
331 str[arglen - 1] = '\0';
337 *val = atoi (str) * multiplier;
341 /* Split into pieces of exactly NCHARS bytes.
342 Use buffer BUF, whose size is BUFSIZE. */
345 bytes_split (nchars, buf, bufsize)
351 int new_file_flag = 1;
353 int to_write = nchars;
358 n_read = stdread (buf, bufsize);
360 error (1, errno, "%s", infile);
365 if (to_read < to_write)
367 if (to_read) /* do not write 0 bytes! */
369 cwrite (new_file_flag, bp_out, to_read);
377 cwrite (new_file_flag, bp_out, to_write);
385 while (n_read == bufsize);
388 /* Split into pieces of exactly NLINES lines.
389 Use buffer BUF, whose size is BUFSIZE. */
392 lines_split (nlines, buf, bufsize)
398 char *bp, *bp_out, *eob;
399 int new_file_flag = 1;
404 n_read = stdread (buf, bufsize);
406 error (1, errno, "%s", infile);
412 while (*bp++ != '\n')
413 ; /* this semicolon takes most of the time */
416 if (eob != bp_out) /* do not write 0 bytes! */
418 cwrite (new_file_flag, bp_out, eob - bp_out);
426 cwrite (new_file_flag, bp_out, bp - bp_out);
433 while (n_read == bufsize);
436 /* Split into pieces that are as large as possible while still not more
437 than NCHARS bytes, and are split on line boundaries except
438 where lines longer than NCHARS bytes occur. */
441 line_bytes_split (nchars)
448 char *buf = (char *) xmalloc (nchars);
452 /* Fill up the full buffer size from the input file. */
454 n_read = stdread (buf + n_buffered, nchars - n_buffered);
456 error (1, errno, "%s", infile);
458 n_buffered += n_read;
459 if (n_buffered != nchars)
462 /* Find where to end this chunk. */
463 bp = buf + n_buffered;
464 if (n_buffered == nchars)
466 while (bp > buf && bp[-1] != '\n')
470 /* If chunk has no newlines, use all the chunk. */
472 bp = buf + n_buffered;
474 /* Output the chars as one output file. */
475 cwrite (1, buf, bp - buf);
477 /* Discard the chars we just output; move rest of chunk
478 down to be the start of the next chunk. */
479 n_buffered -= bp - buf;
481 bcopy (bp, buf, n_buffered);
487 /* Write BYTES bytes at BP to an output file.
488 If NEW_FILE_FLAG is nonzero, open the next output file.
489 Otherwise add to the same output file already in use. */
492 cwrite (new_file_flag, bp, bytes)
499 if (output_desc >= 0 && close (output_desc) < 0)
500 error (1, errno, "%s", outfile);
503 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
505 error (1, errno, "%s", outfile);
507 if (write (output_desc, bp, bytes) < 0)
508 error (1, errno, "%s", outfile);
511 /* Read NCHARS bytes from the input file into BUF.
512 Return the number of bytes successfully read.
513 If this is less than NCHARS, do not call `stdread' again. */
516 stdread (buf, nchars)
521 int to_be_read = nchars;
525 n_read = read (input_desc, buf, to_be_read);
530 to_be_read -= n_read;
533 return nchars - to_be_read;
536 /* Compute the next sequential output file name suffix and store it
537 into the string `outfile' at the position pointed to by `outfile_mid'. */
546 if (outfile_count < outfile_name_limit)
548 for (ne = outfile_end - 1; ; ne--)
560 outfile_name_limit *= 26;
561 outfile_name_generation++;
562 *outfile_mid++ = 'z';
563 for (x = 0; x <= outfile_name_generation; x++)
564 outfile_mid[x] = 'a';