1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
37 static int convint ();
38 static int isdigits ();
39 static int stdread ();
40 static void line_bytes_split ();
41 static void bytes_split ();
42 static void cwrite ();
43 static void lines_split ();
44 static void next_file_name ();
46 /* The name this program was run with. */
49 /* Base name of output files. */
52 /* Pointer to the end of the prefix in OUTFILE.
53 Suffixes are inserted here. */
54 static char *outfile_mid;
56 /* Pointer to the end of OUTFILE. */
57 static char *outfile_end;
59 /* Status for outfile name generation. */
60 static unsigned outfile_count = -1;
61 static unsigned outfile_name_limit = 25 * 26;
62 static unsigned outfile_name_generation = 1;
64 /* Name of input file. May be "-". */
67 /* Descriptor on which input file is open. */
68 static int input_desc;
70 /* Descriptor on which output file is open. */
71 static int output_desc;
73 /* If non-zero, display usage information and exit. */
76 /* If non-zero, print the version on standard output then exit. */
77 static int show_version;
79 static struct option const longopts[] =
81 {"bytes", required_argument, NULL, 'b'},
82 {"lines", required_argument, NULL, 'l'},
83 {"line-bytes", required_argument, NULL, 'C'},
84 {"help", no_argument, &show_help, 1},
85 {"version", no_argument, &show_version, 1},
90 usage (status, reason)
95 fprintf (stderr, "%s: %s\n", program_name, reason);
98 fprintf (stderr, "Try `%s --help' for more information.\n",
103 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
108 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
109 -b, --bytes=SIZE put SIZE bytes per output file\n\
110 -l, --lines=NUMBER put NUMBER lines per output file\n\
111 -NUMBER same as -l NUMBER\n\
112 --help display this help and exit\n\
113 --version output version information and exit\n\
115 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
116 With no PREFIX, use x. With no INPUT, or when INPUT is -, read\n\
128 struct stat stat_buf;
129 int num; /* numeric argument from command line */
132 type_undef, type_bytes, type_byteslines, type_lines, type_digits
133 } split_type = type_undef;
134 int in_blk_size; /* optimal block size of input file device */
135 char *buf; /* file i/o buffer */
139 int digits_optind = 0;
141 program_name = argv[0];
143 /* Parse command line options. */
150 /* This is the argv-index of the option we will read next. */
151 int this_optind = optind ? optind : 1;
153 c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
163 if (split_type != type_undef)
164 usage (2, "cannot split in more than one way");
165 split_type = type_bytes;
166 if (convint (optarg, &accum) == -1)
167 usage (2, "invalid number of bytes");
171 if (split_type != type_undef)
172 usage (2, "cannot split in more than one way");
173 split_type = type_lines;
174 if (!isdigits (optarg))
175 usage (2, "invalid number of lines");
176 accum = atoi (optarg);
180 if (split_type != type_undef)
181 usage (2, "cannot split in more than one way");
182 split_type = type_byteslines;
183 if (convint (optarg, &accum) == -1)
184 usage (2, "invalid number of bytes");
197 if (split_type != type_undef && split_type != type_digits)
198 usage (2, "cannot split in more than one way");
199 if (digits_optind != 0 && digits_optind != this_optind)
200 accum = 0; /* More than one number given; ignore other. */
201 digits_optind = this_optind;
202 split_type = type_digits;
203 accum = accum * 10 + c - '0';
207 usage (2, (char *)0);
213 printf ("split - %s\n", version_string);
218 usage (0, (char *)0);
220 /* Handle default case. */
221 if (split_type == type_undef)
223 split_type = type_lines;
228 usage (2, "invalid number");
231 /* Get out the filename arguments. */
234 infile = argv[optind++];
237 outbase = argv[optind++];
240 usage (2, "too many arguments");
242 /* Open the input file. */
243 if (!strcmp (infile, "-"))
247 input_desc = open (infile, O_RDONLY);
249 error (1, errno, "%s", infile);
252 /* No output file is open now. */
255 /* Copy the output file prefix so we can add suffixes to it.
256 26**29 is certainly enough output files! */
258 outfile = xmalloc (strlen (outbase) + 30);
259 strcpy (outfile, outbase);
260 outfile_mid = outfile + strlen (outfile);
261 outfile_end = outfile_mid + 2;
262 memset (outfile_mid, 0, 30);
263 outfile_mid[0] = 'a';
264 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
266 /* Get the optimal block size of input device and make a buffer. */
268 if (fstat (input_desc, &stat_buf) < 0)
269 error (1, errno, "%s", infile);
270 in_blk_size = ST_BLKSIZE (stat_buf);
272 buf = xmalloc (in_blk_size + 1);
278 lines_split (num, buf, in_blk_size);
282 bytes_split (num, buf, in_blk_size);
285 case type_byteslines:
286 line_bytes_split (num);
293 if (close (input_desc) < 0)
294 error (1, errno, "%s", infile);
295 if (output_desc >= 0 && close (output_desc) < 0)
296 error (1, errno, "%s", outfile);
301 /* Return nonzero if the string STR is composed entirely of decimal digits. */
317 /* Put the value of the number in STR into *VAL.
318 STR can specify a positive integer, optionally ending in `k'
319 to mean kilo or `m' to mean mega.
320 Return 0 if STR is valid, -1 if not. */
328 int arglen = strlen (str);
332 switch (str[arglen - 1])
336 str[arglen - 1] = '\0';
340 str[arglen - 1] = '\0';
343 multiplier = 1048576;
344 str[arglen - 1] = '\0';
350 *val = atoi (str) * multiplier;
354 /* Split into pieces of exactly NCHARS bytes.
355 Use buffer BUF, whose size is BUFSIZE. */
358 bytes_split (nchars, buf, bufsize)
364 int new_file_flag = 1;
366 int to_write = nchars;
371 n_read = stdread (buf, bufsize);
373 error (1, errno, "%s", infile);
378 if (to_read < to_write)
380 if (to_read) /* do not write 0 bytes! */
382 cwrite (new_file_flag, bp_out, to_read);
390 cwrite (new_file_flag, bp_out, to_write);
398 while (n_read == bufsize);
401 /* Split into pieces of exactly NLINES lines.
402 Use buffer BUF, whose size is BUFSIZE. */
405 lines_split (nlines, buf, bufsize)
411 char *bp, *bp_out, *eob;
412 int new_file_flag = 1;
417 n_read = stdread (buf, bufsize);
419 error (1, errno, "%s", infile);
425 while (*bp++ != '\n')
426 ; /* this semicolon takes most of the time */
429 if (eob != bp_out) /* do not write 0 bytes! */
431 cwrite (new_file_flag, bp_out, eob - bp_out);
439 cwrite (new_file_flag, bp_out, bp - bp_out);
446 while (n_read == bufsize);
449 /* Split into pieces that are as large as possible while still not more
450 than NCHARS bytes, and are split on line boundaries except
451 where lines longer than NCHARS bytes occur. */
454 line_bytes_split (nchars)
461 char *buf = (char *) xmalloc (nchars);
465 /* Fill up the full buffer size from the input file. */
467 n_read = stdread (buf + n_buffered, nchars - n_buffered);
469 error (1, errno, "%s", infile);
471 n_buffered += n_read;
472 if (n_buffered != nchars)
475 /* Find where to end this chunk. */
476 bp = buf + n_buffered;
477 if (n_buffered == nchars)
479 while (bp > buf && bp[-1] != '\n')
483 /* If chunk has no newlines, use all the chunk. */
485 bp = buf + n_buffered;
487 /* Output the chars as one output file. */
488 cwrite (1, buf, bp - buf);
490 /* Discard the chars we just output; move rest of chunk
491 down to be the start of the next chunk. Source and
492 destination probably overlap. */
493 n_buffered -= bp - buf;
495 memmove (buf, bp, n_buffered);
501 /* Write BYTES bytes at BP to an output file.
502 If NEW_FILE_FLAG is nonzero, open the next output file.
503 Otherwise add to the same output file already in use. */
506 cwrite (new_file_flag, bp, bytes)
513 if (output_desc >= 0 && close (output_desc) < 0)
514 error (1, errno, "%s", outfile);
517 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
519 error (1, errno, "%s", outfile);
521 if (full_write (output_desc, bp, bytes) < 0)
522 error (1, errno, "%s", outfile);
525 /* Read NCHARS bytes from the input file into BUF.
526 Return the number of bytes successfully read.
527 If this is less than NCHARS, do not call `stdread' again. */
530 stdread (buf, nchars)
535 int to_be_read = nchars;
539 n_read = safe_read (input_desc, buf, to_be_read);
544 to_be_read -= n_read;
547 return nchars - to_be_read;
550 /* Compute the next sequential output file name suffix and store it
551 into the string `outfile' at the position pointed to by `outfile_mid'. */
561 if (outfile_count < outfile_name_limit)
563 for (ne = outfile_end - 1; ; ne--)
575 outfile_name_limit *= 26;
576 outfile_name_generation++;
577 *outfile_mid++ = 'z';
578 for (i = 0; i <= outfile_name_generation; i++)
579 outfile_mid[i] = 'a';