1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 95, 1996 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
35 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
39 # define INT_MAX ((int) (UINT_MAX >> 1))
50 /* The name this program was run with. */
53 /* Base name of output files. */
56 /* Pointer to the end of the prefix in OUTFILE.
57 Suffixes are inserted here. */
58 static char *outfile_mid;
60 /* Pointer to the end of OUTFILE. */
61 static char *outfile_end;
63 /* Name of input file. May be "-". */
66 /* Descriptor on which input file is open. */
67 static int input_desc;
69 /* Descriptor on which output file is open. */
70 static int output_desc;
72 /* If nonzero, display usage information and exit. */
75 /* If nonzero, print the version on standard output then exit. */
76 static int show_version;
78 /* If nonzero, print a diagnostic on standard error just before each
79 output file is opened. */
82 static struct option const longopts[] =
84 {"bytes", required_argument, NULL, 'b'},
85 {"lines", required_argument, NULL, 'l'},
86 {"line-bytes", required_argument, NULL, 'C'},
87 {"verbose", no_argument, NULL, 2},
88 {"help", no_argument, &show_help, 1},
89 {"version", no_argument, &show_version, 1},
97 fprintf (stderr, _("Try `%s --help' for more information.\n"),
102 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
106 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
107 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
109 -b, --bytes=SIZE put SIZE bytes per output file\n\
110 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
111 -l, --lines=NUMBER put NUMBER lines per output file\n\
112 -NUMBER same as -l NUMBER\n\
113 --verbose print a diagnostic to standard error just\n\
114 before each output file is opened\n\
115 --help display this help and exit\n\
116 --version output version information and exit\n\
118 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
120 puts (_("\nReport bugs to <textutils-bugs@gnu.ai.mit.edu>."));
122 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
125 /* Compute the next sequential output file name suffix and store it
126 into the string `outfile' at the position pointed to by `outfile_mid'. */
129 next_file_name (void)
135 static int first_call = 1;
137 /* Status for outfile name generation. */
138 static unsigned outfile_count = 0;
139 static unsigned outfile_name_limit = 25 * 26;
140 static unsigned outfile_name_generation = 1;
145 if (outfile_count < outfile_name_limit)
147 for (ne = outfile_end - 1; ; ne--)
159 outfile_name_limit *= 26;
160 outfile_name_generation++;
161 *outfile_mid++ = 'z';
162 for (i = 0; i <= outfile_name_generation; i++)
163 outfile_mid[i] = 'a';
167 /* Write BYTES bytes at BP to an output file.
168 If NEW_FILE_FLAG is nonzero, open the next output file.
169 Otherwise add to the same output file already in use. */
172 cwrite (int new_file_flag, const char *bp, int bytes)
176 if (output_desc >= 0 && close (output_desc) < 0)
177 error (EXIT_FAILURE, errno, "%s", outfile);
181 fprintf (stderr, _("creating file `%s'\n"), outfile);
182 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
184 error (EXIT_FAILURE, errno, "%s", outfile);
186 if (full_write (output_desc, bp, bytes) < 0)
187 error (EXIT_FAILURE, errno, "%s", outfile);
190 /* Read NCHARS bytes from the input file into BUF.
191 Return the number of bytes successfully read.
192 If this is less than NCHARS, do not call `stdread' again. */
195 stdread (char *buf, int nchars)
198 int to_be_read = nchars;
202 n_read = safe_read (input_desc, buf, to_be_read);
207 to_be_read -= n_read;
210 return nchars - to_be_read;
213 /* Split into pieces of exactly NCHARS bytes.
214 Use buffer BUF, whose size is BUFSIZE. */
217 bytes_split (int nchars, char *buf, int bufsize)
220 int new_file_flag = 1;
222 int to_write = nchars;
227 n_read = stdread (buf, bufsize);
229 error (EXIT_FAILURE, errno, "%s", infile);
234 if (to_read < to_write)
236 if (to_read) /* do not write 0 bytes! */
238 cwrite (new_file_flag, bp_out, to_read);
246 cwrite (new_file_flag, bp_out, to_write);
254 while (n_read == bufsize);
257 /* Split into pieces of exactly NLINES lines.
258 Use buffer BUF, whose size is BUFSIZE. */
261 lines_split (int nlines, char *buf, int bufsize)
264 char *bp, *bp_out, *eob;
265 int new_file_flag = 1;
270 n_read = stdread (buf, bufsize);
272 error (EXIT_FAILURE, errno, "%s", infile);
278 while (*bp++ != '\n')
279 ; /* this semicolon takes most of the time */
282 if (eob != bp_out) /* do not write 0 bytes! */
284 cwrite (new_file_flag, bp_out, eob - bp_out);
292 cwrite (new_file_flag, bp_out, bp - bp_out);
299 while (n_read == bufsize);
302 /* Split into pieces that are as large as possible while still not more
303 than NCHARS bytes, and are split on line boundaries except
304 where lines longer than NCHARS bytes occur. */
307 line_bytes_split (int nchars)
313 char *buf = (char *) xmalloc (nchars);
317 /* Fill up the full buffer size from the input file. */
319 n_read = stdread (buf + n_buffered, nchars - n_buffered);
321 error (EXIT_FAILURE, errno, "%s", infile);
323 n_buffered += n_read;
324 if (n_buffered != nchars)
327 /* Find where to end this chunk. */
328 bp = buf + n_buffered;
329 if (n_buffered == nchars)
331 while (bp > buf && bp[-1] != '\n')
335 /* If chunk has no newlines, use all the chunk. */
337 bp = buf + n_buffered;
339 /* Output the chars as one output file. */
340 cwrite (1, buf, bp - buf);
342 /* Discard the chars we just output; move rest of chunk
343 down to be the start of the next chunk. Source and
344 destination probably overlap. */
345 n_buffered -= bp - buf;
347 memmove (buf, bp, n_buffered);
354 main (int argc, char **argv)
356 struct stat stat_buf;
357 int num; /* numeric argument from command line */
360 type_undef, type_bytes, type_byteslines, type_lines, type_digits
361 } split_type = type_undef;
362 int in_blk_size; /* optimal block size of input file device */
363 char *buf; /* file i/o buffer */
367 int digits_optind = 0;
369 program_name = argv[0];
370 setlocale (LC_ALL, "");
371 bindtextdomain (PACKAGE, LOCALEDIR);
372 textdomain (PACKAGE);
374 /* Parse command line options. */
381 /* This is the argv-index of the option we will read next. */
382 int this_optind = optind ? optind : 1;
385 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
395 if (split_type != type_undef)
397 error (0, 0, _("cannot split in more than one way"));
398 usage (EXIT_FAILURE);
400 split_type = type_bytes;
401 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
402 || tmp_long < 0 || tmp_long > INT_MAX)
404 error (0, 0, _("%s: invalid number of bytes"), optarg);
405 usage (EXIT_FAILURE);
407 accum = (int) tmp_long;
411 if (split_type != type_undef)
413 error (0, 0, _("cannot split in more than one way"));
414 usage (EXIT_FAILURE);
416 split_type = type_lines;
417 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
418 || tmp_long < 0 || tmp_long > INT_MAX)
420 error (0, 0, _("%s: invalid number of lines"), optarg);
421 usage (EXIT_FAILURE);
423 accum = (int) tmp_long;
427 if (split_type != type_undef)
429 error (0, 0, _("cannot split in more than one way"));
430 usage (EXIT_FAILURE);
433 split_type = type_byteslines;
434 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
435 || tmp_long < 0 || tmp_long > INT_MAX)
437 error (0, 0, _("%s: invalid number of bytes"), optarg);
438 usage (EXIT_FAILURE);
440 accum = (int) tmp_long;
453 if (split_type != type_undef && split_type != type_digits)
455 error (0, 0, _("cannot split in more than one way"));
456 usage (EXIT_FAILURE);
458 if (digits_optind != 0 && digits_optind != this_optind)
459 accum = 0; /* More than one number given; ignore other. */
460 digits_optind = this_optind;
461 split_type = type_digits;
462 accum = accum * 10 + c - '0';
470 usage (EXIT_FAILURE);
476 printf ("split (%s) %s\n", GNU_PACKAGE, VERSION);
483 /* Handle default case. */
484 if (split_type == type_undef)
486 split_type = type_lines;
492 error (0, 0, _("invalid number"));
493 usage (EXIT_FAILURE);
497 /* Get out the filename arguments. */
500 infile = argv[optind++];
503 outbase = argv[optind++];
507 error (0, 0, _("too many arguments"));
508 usage (EXIT_FAILURE);
511 /* Open the input file. */
512 if (!strcmp (infile, "-"))
516 input_desc = open (infile, O_RDONLY);
518 error (EXIT_FAILURE, errno, "%s", infile);
521 /* No output file is open now. */
524 /* Copy the output file prefix so we can add suffixes to it.
525 26**29 is certainly enough output files! */
527 outfile = xmalloc (strlen (outbase) + 30);
528 strcpy (outfile, outbase);
529 outfile_mid = outfile + strlen (outfile);
530 outfile_end = outfile_mid + 2;
531 memset (outfile_mid, 0, 30);
532 outfile_mid[0] = 'a';
533 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
535 /* Get the optimal block size of input device and make a buffer. */
537 if (fstat (input_desc, &stat_buf) < 0)
538 error (EXIT_FAILURE, errno, "%s", infile);
539 in_blk_size = ST_BLKSIZE (stat_buf);
541 buf = xmalloc (in_blk_size + 1);
547 lines_split (num, buf, in_blk_size);
551 bytes_split (num, buf, in_blk_size);
554 case type_byteslines:
555 line_bytes_split (num);
562 if (close (input_desc) < 0)
563 error (EXIT_FAILURE, errno, "%s", infile);
564 if (output_desc >= 0 && close (output_desc) < 0)
565 error (EXIT_FAILURE, errno, "%s", outfile);