1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 95, 96, 1997 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
35 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
39 # define INT_MAX ((int) (UINT_MAX >> 1))
49 /* The name this program was run with. */
52 /* Base name of output files. */
55 /* Pointer to the end of the prefix in OUTFILE.
56 Suffixes are inserted here. */
57 static char *outfile_mid;
59 /* Pointer to the end of OUTFILE. */
60 static char *outfile_end;
62 /* Name of input file. May be "-". */
65 /* Descriptor on which input file is open. */
66 static int input_desc;
68 /* Descriptor on which output file is open. */
69 static int output_desc;
71 /* If nonzero, display usage information and exit. */
74 /* If nonzero, print the version on standard output then exit. */
75 static int show_version;
77 /* If nonzero, print a diagnostic on standard error just before each
78 output file is opened. */
81 static struct option const longopts[] =
83 {"bytes", required_argument, NULL, 'b'},
84 {"lines", required_argument, NULL, 'l'},
85 {"line-bytes", required_argument, NULL, 'C'},
86 {"verbose", no_argument, NULL, 2},
87 {"help", no_argument, &show_help, 1},
88 {"version", no_argument, &show_version, 1},
96 fprintf (stderr, _("Try `%s --help' for more information.\n"),
101 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
105 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
106 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
108 -b, --bytes=SIZE put SIZE bytes per output file\n\
109 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
110 -l, --lines=NUMBER put NUMBER lines per output file\n\
111 -NUMBER same as -l NUMBER\n\
112 --verbose print a diagnostic to standard error just\n\
113 before each output file is opened\n\
114 --help display this help and exit\n\
115 --version output version information and exit\n\
117 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
119 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
121 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
124 /* Compute the next sequential output file name suffix and store it
125 into the string `outfile' at the position pointed to by `outfile_mid'. */
128 next_file_name (void)
134 static int first_call = 1;
136 /* Status for outfile name generation. */
137 static unsigned outfile_count = 0;
138 static unsigned outfile_name_limit = 25 * 26;
139 static unsigned outfile_name_generation = 1;
144 if (outfile_count < outfile_name_limit)
146 for (ne = outfile_end - 1; ; ne--)
158 outfile_name_limit *= 26;
159 outfile_name_generation++;
160 *outfile_mid++ = 'z';
161 for (i = 0; i <= outfile_name_generation; i++)
162 outfile_mid[i] = 'a';
166 /* Write BYTES bytes at BP to an output file.
167 If NEW_FILE_FLAG is nonzero, open the next output file.
168 Otherwise add to the same output file already in use. */
171 cwrite (int new_file_flag, const char *bp, int bytes)
175 if (output_desc >= 0 && close (output_desc) < 0)
176 error (EXIT_FAILURE, errno, "%s", outfile);
180 fprintf (stderr, _("creating file `%s'\n"), outfile);
181 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
183 error (EXIT_FAILURE, errno, "%s", outfile);
185 if (full_write (output_desc, bp, bytes) < 0)
186 error (EXIT_FAILURE, errno, "%s", outfile);
189 /* Read NCHARS bytes from the input file into BUF.
190 Return the number of bytes successfully read.
191 If this is less than NCHARS, do not call `stdread' again. */
194 stdread (char *buf, int nchars)
197 int to_be_read = nchars;
201 n_read = safe_read (input_desc, buf, to_be_read);
206 to_be_read -= n_read;
209 return nchars - to_be_read;
212 /* Split into pieces of exactly NCHARS bytes.
213 Use buffer BUF, whose size is BUFSIZE. */
216 bytes_split (int nchars, char *buf, int bufsize)
219 int new_file_flag = 1;
221 int to_write = nchars;
226 n_read = stdread (buf, bufsize);
228 error (EXIT_FAILURE, errno, "%s", infile);
233 if (to_read < to_write)
235 if (to_read) /* do not write 0 bytes! */
237 cwrite (new_file_flag, bp_out, to_read);
245 cwrite (new_file_flag, bp_out, to_write);
253 while (n_read == bufsize);
256 /* Split into pieces of exactly NLINES lines.
257 Use buffer BUF, whose size is BUFSIZE. */
260 lines_split (int nlines, char *buf, int bufsize)
263 char *bp, *bp_out, *eob;
264 int new_file_flag = 1;
269 n_read = stdread (buf, bufsize);
271 error (EXIT_FAILURE, errno, "%s", infile);
277 while (*bp++ != '\n')
278 ; /* this semicolon takes most of the time */
281 if (eob != bp_out) /* do not write 0 bytes! */
283 cwrite (new_file_flag, bp_out, eob - bp_out);
291 cwrite (new_file_flag, bp_out, bp - bp_out);
298 while (n_read == bufsize);
301 /* Split into pieces that are as large as possible while still not more
302 than NCHARS bytes, and are split on line boundaries except
303 where lines longer than NCHARS bytes occur. */
306 line_bytes_split (int nchars)
312 char *buf = (char *) xmalloc (nchars);
316 /* Fill up the full buffer size from the input file. */
318 n_read = stdread (buf + n_buffered, nchars - n_buffered);
320 error (EXIT_FAILURE, errno, "%s", infile);
322 n_buffered += n_read;
323 if (n_buffered != nchars)
326 /* Find where to end this chunk. */
327 bp = buf + n_buffered;
328 if (n_buffered == nchars)
330 while (bp > buf && bp[-1] != '\n')
334 /* If chunk has no newlines, use all the chunk. */
336 bp = buf + n_buffered;
338 /* Output the chars as one output file. */
339 cwrite (1, buf, bp - buf);
341 /* Discard the chars we just output; move rest of chunk
342 down to be the start of the next chunk. Source and
343 destination probably overlap. */
344 n_buffered -= bp - buf;
346 memmove (buf, bp, n_buffered);
353 main (int argc, char **argv)
355 struct stat stat_buf;
356 int num; /* numeric argument from command line */
359 type_undef, type_bytes, type_byteslines, type_lines, type_digits
360 } split_type = type_undef;
361 int in_blk_size; /* optimal block size of input file device */
362 char *buf; /* file i/o buffer */
366 int digits_optind = 0;
368 program_name = argv[0];
369 setlocale (LC_ALL, "");
370 bindtextdomain (PACKAGE, LOCALEDIR);
371 textdomain (PACKAGE);
373 /* Parse command line options. */
380 /* This is the argv-index of the option we will read next. */
381 int this_optind = optind ? optind : 1;
384 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
394 if (split_type != type_undef)
396 error (0, 0, _("cannot split in more than one way"));
397 usage (EXIT_FAILURE);
399 split_type = type_bytes;
400 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
401 || tmp_long < 0 || tmp_long > INT_MAX)
403 error (0, 0, _("%s: invalid number of bytes"), optarg);
404 usage (EXIT_FAILURE);
406 accum = (int) tmp_long;
410 if (split_type != type_undef)
412 error (0, 0, _("cannot split in more than one way"));
413 usage (EXIT_FAILURE);
415 split_type = type_lines;
416 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
417 || tmp_long < 0 || tmp_long > INT_MAX)
419 error (0, 0, _("%s: invalid number of lines"), optarg);
420 usage (EXIT_FAILURE);
422 accum = (int) tmp_long;
426 if (split_type != type_undef)
428 error (0, 0, _("cannot split in more than one way"));
429 usage (EXIT_FAILURE);
432 split_type = type_byteslines;
433 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
434 || tmp_long < 0 || tmp_long > INT_MAX)
436 error (0, 0, _("%s: invalid number of bytes"), optarg);
437 usage (EXIT_FAILURE);
439 accum = (int) tmp_long;
452 if (split_type != type_undef && split_type != type_digits)
454 error (0, 0, _("cannot split in more than one way"));
455 usage (EXIT_FAILURE);
457 if (digits_optind != 0 && digits_optind != this_optind)
458 accum = 0; /* More than one number given; ignore other. */
459 digits_optind = this_optind;
460 split_type = type_digits;
461 accum = accum * 10 + c - '0';
469 usage (EXIT_FAILURE);
475 printf ("split (%s) %s\n", GNU_PACKAGE, VERSION);
482 /* Handle default case. */
483 if (split_type == type_undef)
485 split_type = type_lines;
491 error (0, 0, _("invalid number"));
492 usage (EXIT_FAILURE);
496 /* Get out the filename arguments. */
499 infile = argv[optind++];
502 outbase = argv[optind++];
506 error (0, 0, _("too many arguments"));
507 usage (EXIT_FAILURE);
510 /* Open the input file. */
511 if (!strcmp (infile, "-"))
515 input_desc = open (infile, O_RDONLY);
517 error (EXIT_FAILURE, errno, "%s", infile);
520 /* No output file is open now. */
523 /* Copy the output file prefix so we can add suffixes to it.
524 26**29 is certainly enough output files! */
526 outfile = xmalloc (strlen (outbase) + 30);
527 strcpy (outfile, outbase);
528 outfile_mid = outfile + strlen (outfile);
529 outfile_end = outfile_mid + 2;
530 memset (outfile_mid, 0, 30);
531 outfile_mid[0] = 'a';
532 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
534 /* Get the optimal block size of input device and make a buffer. */
536 if (fstat (input_desc, &stat_buf) < 0)
537 error (EXIT_FAILURE, errno, "%s", infile);
538 in_blk_size = ST_BLKSIZE (stat_buf);
540 buf = xmalloc (in_blk_size + 1);
546 lines_split (num, buf, in_blk_size);
550 bytes_split (num, buf, in_blk_size);
553 case type_byteslines:
554 line_bytes_split (num);
561 if (close (input_desc) < 0)
562 error (EXIT_FAILURE, errno, "%s", infile);
563 if (output_desc >= 0 && close (output_desc) < 0)
564 error (EXIT_FAILURE, errno, "%s", outfile);