1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
37 static int convint ();
38 static int isdigits ();
39 static int stdread ();
40 static void line_bytes_split ();
41 static void bytes_split ();
42 static void cwrite ();
43 static void lines_split ();
44 static void next_file_name ();
46 /* The name this program was run with. */
49 /* Base name of output files. */
52 /* Pointer to the end of the prefix in OUTFILE.
53 Suffixes are inserted here. */
54 static char *outfile_mid;
56 /* Pointer to the end of OUTFILE. */
57 static char *outfile_end;
59 /* Name of input file. May be "-". */
62 /* Descriptor on which input file is open. */
63 static int input_desc;
65 /* Descriptor on which output file is open. */
66 static int output_desc;
68 /* If non-zero, display usage information and exit. */
71 /* If non-zero, print the version on standard output then exit. */
72 static int show_version;
74 static struct option const longopts[] =
76 {"bytes", required_argument, NULL, 'b'},
77 {"lines", required_argument, NULL, 'l'},
78 {"line-bytes", required_argument, NULL, 'C'},
79 {"help", no_argument, &show_help, 1},
80 {"version", no_argument, &show_version, 1},
85 usage (status, reason)
90 fprintf (stderr, "%s: %s\n", program_name, reason);
93 fprintf (stderr, "Try `%s --help' for more information.\n",
98 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
102 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
103 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
105 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
106 -b, --bytes=SIZE put SIZE bytes per output file\n\
107 -l, --lines=NUMBER put NUMBER lines per output file\n\
108 -NUMBER same as -l NUMBER\n\
109 --help display this help and exit\n\
110 --version output version information and exit\n\
112 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
123 struct stat stat_buf;
124 int num; /* numeric argument from command line */
127 type_undef, type_bytes, type_byteslines, type_lines, type_digits
128 } split_type = type_undef;
129 int in_blk_size; /* optimal block size of input file device */
130 char *buf; /* file i/o buffer */
134 int digits_optind = 0;
136 program_name = argv[0];
138 /* Parse command line options. */
145 /* This is the argv-index of the option we will read next. */
146 int this_optind = optind ? optind : 1;
148 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
158 if (split_type != type_undef)
159 usage (2, "cannot split in more than one way");
160 split_type = type_bytes;
161 if (convint (optarg, &accum) == -1)
162 usage (2, "invalid number of bytes");
166 if (split_type != type_undef)
167 usage (2, "cannot split in more than one way");
168 split_type = type_lines;
169 if (!isdigits (optarg))
170 usage (2, "invalid number of lines");
171 accum = atoi (optarg);
175 if (split_type != type_undef)
176 usage (2, "cannot split in more than one way");
177 split_type = type_byteslines;
178 if (convint (optarg, &accum) == -1)
179 usage (2, "invalid number of bytes");
192 if (split_type != type_undef && split_type != type_digits)
193 usage (2, "cannot split in more than one way");
194 if (digits_optind != 0 && digits_optind != this_optind)
195 accum = 0; /* More than one number given; ignore other. */
196 digits_optind = this_optind;
197 split_type = type_digits;
198 accum = accum * 10 + c - '0';
202 usage (2, (char *)0);
208 printf ("split - %s\n", version_string);
213 usage (0, (char *)0);
215 /* Handle default case. */
216 if (split_type == type_undef)
218 split_type = type_lines;
223 usage (2, "invalid number");
226 /* Get out the filename arguments. */
229 infile = argv[optind++];
232 outbase = argv[optind++];
235 usage (2, "too many arguments");
237 /* Open the input file. */
238 if (!strcmp (infile, "-"))
242 input_desc = open (infile, O_RDONLY);
244 error (1, errno, "%s", infile);
247 /* No output file is open now. */
250 /* Copy the output file prefix so we can add suffixes to it.
251 26**29 is certainly enough output files! */
253 outfile = xmalloc (strlen (outbase) + 30);
254 strcpy (outfile, outbase);
255 outfile_mid = outfile + strlen (outfile);
256 outfile_end = outfile_mid + 2;
257 memset (outfile_mid, 0, 30);
258 outfile_mid[0] = 'a';
259 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
261 /* Get the optimal block size of input device and make a buffer. */
263 if (fstat (input_desc, &stat_buf) < 0)
264 error (1, errno, "%s", infile);
265 in_blk_size = ST_BLKSIZE (stat_buf);
267 buf = xmalloc (in_blk_size + 1);
273 lines_split (num, buf, in_blk_size);
277 bytes_split (num, buf, in_blk_size);
280 case type_byteslines:
281 line_bytes_split (num);
288 if (close (input_desc) < 0)
289 error (1, errno, "%s", infile);
290 if (output_desc >= 0 && close (output_desc) < 0)
291 error (1, errno, "%s", outfile);
296 /* Return nonzero if the string STR is composed entirely of decimal digits. */
312 /* Put the value of the number in STR into *VAL.
313 STR can specify a positive integer, optionally ending in `k'
314 to mean kilo or `m' to mean mega.
315 Return 0 if STR is valid, -1 if not. */
323 int arglen = strlen (str);
327 switch (str[arglen - 1])
331 str[arglen - 1] = '\0';
335 str[arglen - 1] = '\0';
338 multiplier = 1048576;
339 str[arglen - 1] = '\0';
345 *val = atoi (str) * multiplier;
349 /* Split into pieces of exactly NCHARS bytes.
350 Use buffer BUF, whose size is BUFSIZE. */
353 bytes_split (nchars, buf, bufsize)
359 int new_file_flag = 1;
361 int to_write = nchars;
366 n_read = stdread (buf, bufsize);
368 error (1, errno, "%s", infile);
373 if (to_read < to_write)
375 if (to_read) /* do not write 0 bytes! */
377 cwrite (new_file_flag, bp_out, to_read);
385 cwrite (new_file_flag, bp_out, to_write);
393 while (n_read == bufsize);
396 /* Split into pieces of exactly NLINES lines.
397 Use buffer BUF, whose size is BUFSIZE. */
400 lines_split (nlines, buf, bufsize)
406 char *bp, *bp_out, *eob;
407 int new_file_flag = 1;
412 n_read = stdread (buf, bufsize);
414 error (1, errno, "%s", infile);
420 while (*bp++ != '\n')
421 ; /* this semicolon takes most of the time */
424 if (eob != bp_out) /* do not write 0 bytes! */
426 cwrite (new_file_flag, bp_out, eob - bp_out);
434 cwrite (new_file_flag, bp_out, bp - bp_out);
441 while (n_read == bufsize);
444 /* Split into pieces that are as large as possible while still not more
445 than NCHARS bytes, and are split on line boundaries except
446 where lines longer than NCHARS bytes occur. */
449 line_bytes_split (nchars)
456 char *buf = (char *) xmalloc (nchars);
460 /* Fill up the full buffer size from the input file. */
462 n_read = stdread (buf + n_buffered, nchars - n_buffered);
464 error (1, errno, "%s", infile);
466 n_buffered += n_read;
467 if (n_buffered != nchars)
470 /* Find where to end this chunk. */
471 bp = buf + n_buffered;
472 if (n_buffered == nchars)
474 while (bp > buf && bp[-1] != '\n')
478 /* If chunk has no newlines, use all the chunk. */
480 bp = buf + n_buffered;
482 /* Output the chars as one output file. */
483 cwrite (1, buf, bp - buf);
485 /* Discard the chars we just output; move rest of chunk
486 down to be the start of the next chunk. Source and
487 destination probably overlap. */
488 n_buffered -= bp - buf;
490 memmove (buf, bp, n_buffered);
496 /* Write BYTES bytes at BP to an output file.
497 If NEW_FILE_FLAG is nonzero, open the next output file.
498 Otherwise add to the same output file already in use. */
501 cwrite (new_file_flag, bp, bytes)
508 if (output_desc >= 0 && close (output_desc) < 0)
509 error (1, errno, "%s", outfile);
512 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
514 error (1, errno, "%s", outfile);
516 if (full_write (output_desc, bp, bytes) < 0)
517 error (1, errno, "%s", outfile);
520 /* Read NCHARS bytes from the input file into BUF.
521 Return the number of bytes successfully read.
522 If this is less than NCHARS, do not call `stdread' again. */
525 stdread (buf, nchars)
530 int to_be_read = nchars;
534 n_read = safe_read (input_desc, buf, to_be_read);
539 to_be_read -= n_read;
542 return nchars - to_be_read;
545 /* Compute the next sequential output file name suffix and store it
546 into the string `outfile' at the position pointed to by `outfile_mid'. */
555 static int first_call = 1;
557 /* Status for outfile name generation. */
558 static unsigned outfile_count = 0;
559 static unsigned outfile_name_limit = 25 * 26;
560 static unsigned outfile_name_generation = 1;
565 if (outfile_count < outfile_name_limit)
567 for (ne = outfile_end - 1; ; ne--)
579 outfile_name_limit *= 26;
580 outfile_name_generation++;
581 *outfile_mid++ = 'z';
582 for (i = 0; i <= outfile_name_generation; i++)
583 outfile_mid[i] = 'a';