1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
33 #include "full-write.h"
34 #include "safe-read.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "split"
40 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
42 /* The name this program was run with. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end;
55 /* Name of input file. May be "-". */
58 /* Descriptor on which input file is open. */
59 static int input_desc;
61 /* Descriptor on which output file is open. */
62 static int output_desc;
64 /* If nonzero, print a diagnostic on standard error just before each
65 output file is opened. */
68 static char const shortopts[] = "vb:l:C:"
69 #if POSIX2_VERSION < 200112
74 static struct option const longopts[] =
76 {"bytes", required_argument, NULL, 'b'},
77 {"lines", required_argument, NULL, 'l'},
78 {"line-bytes", required_argument, NULL, 'C'},
79 {"verbose", no_argument, NULL, 2},
80 {GETOPT_HELP_OPTION_DECL},
81 {GETOPT_VERSION_OPTION_DECL},
89 fprintf (stderr, _("Try `%s --help' for more information.\n"),
94 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
98 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
99 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
103 Mandatory arguments to long options are mandatory for short options too.\n\
106 -b, --bytes=SIZE put SIZE bytes per output file\n\
107 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
108 -l, --lines=NUMBER put NUMBER lines per output file\n\
110 if (POSIX2_VERSION < 200112)
112 -NUMBER (obsolete) same as -l NUMBER\n\
115 --verbose print a diagnostic to standard error just\n\
116 before each output file is opened\n\
118 fputs (HELP_OPTION_DESCRIPTION, stdout);
119 fputs (VERSION_OPTION_DESCRIPTION, stdout);
122 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
124 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
126 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
129 /* Compute the next sequential output file name suffix and store it
130 into the string `outfile' at the position pointed to by `outfile_mid'. */
133 next_file_name (void)
135 static unsigned n_digits = 2;
138 /* Change any suffix of `z's to `a's. */
139 for (p = outfile_end - 1; *p == 'z'; p--)
144 /* Increment the rightmost non-`z' character that was present before the
145 above z/a substitutions. There is guaranteed to be such a character. */
148 /* If the result of that increment operation yielded a `z' and there
149 are only `z's to the left of it, then append two more `a' characters
150 to the end and add 1 (-1 + 2) to the number of digits (we're taking
151 out this `z' and adding two `a's). */
152 if (*p == 'z' && p == outfile_mid)
156 *outfile_end++ = 'a';
157 *outfile_end++ = 'a';
161 /* Write BYTES bytes at BP to an output file.
162 If NEW_FILE_FLAG is nonzero, open the next output file.
163 Otherwise add to the same output file already in use. */
166 cwrite (int new_file_flag, const char *bp, int bytes)
170 if (output_desc >= 0 && close (output_desc) < 0)
171 error (EXIT_FAILURE, errno, "%s", outfile);
175 fprintf (stderr, _("creating file `%s'\n"), outfile);
176 output_desc = open (outfile,
177 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
179 error (EXIT_FAILURE, errno, "%s", outfile);
181 if (full_write (output_desc, bp, bytes) != bytes)
182 error (EXIT_FAILURE, errno, "%s", outfile);
185 /* Read NCHARS bytes from the input file into BUF.
186 Return the number of bytes successfully read.
187 If this is less than NCHARS, do not call `stdread' again. */
190 stdread (char *buf, int nchars)
193 int to_be_read = nchars;
197 n_read = safe_read (input_desc, buf, to_be_read);
202 to_be_read -= n_read;
205 return nchars - to_be_read;
208 /* Split into pieces of exactly NCHARS bytes.
209 Use buffer BUF, whose size is BUFSIZE. */
212 bytes_split (int nchars, char *buf, int bufsize)
215 int new_file_flag = 1;
217 int to_write = nchars;
222 n_read = stdread (buf, bufsize);
224 error (EXIT_FAILURE, errno, "%s", infile);
229 if (to_read < to_write)
231 if (to_read) /* do not write 0 bytes! */
233 cwrite (new_file_flag, bp_out, to_read);
241 cwrite (new_file_flag, bp_out, to_write);
249 while (n_read == bufsize);
252 /* Split into pieces of exactly NLINES lines.
253 Use buffer BUF, whose size is BUFSIZE. */
256 lines_split (int nlines, char *buf, int bufsize)
259 char *bp, *bp_out, *eob;
260 int new_file_flag = 1;
265 n_read = stdread (buf, bufsize);
267 error (EXIT_FAILURE, errno, "%s", infile);
273 while (*bp++ != '\n')
274 ; /* this semicolon takes most of the time */
277 if (eob != bp_out) /* do not write 0 bytes! */
279 cwrite (new_file_flag, bp_out, eob - bp_out);
287 cwrite (new_file_flag, bp_out, bp - bp_out);
294 while (n_read == bufsize);
297 /* Split into pieces that are as large as possible while still not more
298 than NCHARS bytes, and are split on line boundaries except
299 where lines longer than NCHARS bytes occur. */
302 line_bytes_split (int nchars)
308 char *buf = (char *) xmalloc (nchars);
312 /* Fill up the full buffer size from the input file. */
314 n_read = stdread (buf + n_buffered, nchars - n_buffered);
316 error (EXIT_FAILURE, errno, "%s", infile);
318 n_buffered += n_read;
319 if (n_buffered != nchars)
322 /* Find where to end this chunk. */
323 bp = buf + n_buffered;
324 if (n_buffered == nchars)
326 while (bp > buf && bp[-1] != '\n')
330 /* If chunk has no newlines, use all the chunk. */
332 bp = buf + n_buffered;
334 /* Output the chars as one output file. */
335 cwrite (1, buf, bp - buf);
337 /* Discard the chars we just output; move rest of chunk
338 down to be the start of the next chunk. Source and
339 destination probably overlap. */
340 n_buffered -= bp - buf;
342 memmove (buf, bp, n_buffered);
349 main (int argc, char **argv)
351 struct stat stat_buf;
352 int num; /* numeric argument from command line */
355 type_undef, type_bytes, type_byteslines, type_lines, type_digits
356 } split_type = type_undef;
357 int in_blk_size; /* optimal block size of input file device */
358 char *buf; /* file i/o buffer */
362 int digits_optind = 0;
364 program_name = argv[0];
365 setlocale (LC_ALL, "");
366 bindtextdomain (PACKAGE, LOCALEDIR);
367 textdomain (PACKAGE);
369 atexit (close_stdout);
371 /* Parse command line options. */
378 /* This is the argv-index of the option we will read next. */
379 int this_optind = optind ? optind : 1;
382 c = getopt_long (argc, argv, shortopts, longopts, NULL);
392 if (split_type != type_undef)
394 error (0, 0, _("cannot split in more than one way"));
395 usage (EXIT_FAILURE);
397 split_type = type_bytes;
398 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
399 || tmp_long < 0 || tmp_long > INT_MAX)
401 error (0, 0, _("%s: invalid number of bytes"), optarg);
402 usage (EXIT_FAILURE);
404 accum = (int) tmp_long;
408 if (split_type != type_undef)
410 error (0, 0, _("cannot split in more than one way"));
411 usage (EXIT_FAILURE);
413 split_type = type_lines;
414 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
415 || tmp_long < 0 || tmp_long > INT_MAX)
417 error (0, 0, _("%s: invalid number of lines"), optarg);
418 usage (EXIT_FAILURE);
420 accum = (int) tmp_long;
424 if (split_type != type_undef)
426 error (0, 0, _("cannot split in more than one way"));
427 usage (EXIT_FAILURE);
430 split_type = type_byteslines;
431 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
432 || tmp_long < 0 || tmp_long > INT_MAX)
434 error (0, 0, _("%s: invalid number of bytes"), optarg);
435 usage (EXIT_FAILURE);
437 accum = (int) tmp_long;
440 #if POSIX2_VERSION < 200112
451 if (split_type != type_undef && split_type != type_digits)
453 error (0, 0, _("cannot split in more than one way"));
454 usage (EXIT_FAILURE);
456 if (digits_optind != 0 && digits_optind != this_optind)
457 accum = 0; /* More than one number given; ignore other. */
458 digits_optind = this_optind;
459 split_type = type_digits;
460 accum = accum * 10 + c - '0';
468 case_GETOPT_HELP_CHAR;
470 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
473 usage (EXIT_FAILURE);
477 if (OBSOLETE_OPTION_WARNINGS
478 && digits_optind && ! getenv ("POSIXLY_CORRECT"))
479 error (0, 0, _("warning: `split -%d' is obsolete; use `split -l %d'"),
482 /* Handle default case. */
483 if (split_type == type_undef)
485 split_type = type_lines;
491 error (0, 0, _("invalid number"));
492 usage (EXIT_FAILURE);
496 /* Get out the filename arguments. */
499 infile = argv[optind++];
502 outbase = argv[optind++];
506 error (0, 0, _("too many arguments"));
507 usage (EXIT_FAILURE);
510 /* Open the input file. */
511 if (STREQ (infile, "-"))
515 input_desc = open (infile, O_RDONLY);
517 error (EXIT_FAILURE, errno, "%s", infile);
519 /* Binary I/O is safer when bytecounts are used. */
520 SET_BINARY (input_desc);
522 /* No output file is open now. */
525 /* Copy the output file prefix so we can add suffixes to it.
526 26**29 is certainly enough output files! */
528 outfile = xmalloc (strlen (outbase) + 30);
529 strcpy (outfile, outbase);
530 outfile_mid = outfile + strlen (outfile);
531 outfile_end = outfile_mid + 2;
532 memset (outfile_mid, 0, 30);
533 outfile_mid[0] = 'a';
534 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
536 /* Get the optimal block size of input device and make a buffer. */
538 if (fstat (input_desc, &stat_buf) < 0)
539 error (EXIT_FAILURE, errno, "%s", infile);
540 in_blk_size = ST_BLKSIZE (stat_buf);
542 buf = xmalloc (in_blk_size + 1);
548 lines_split (num, buf, in_blk_size);
552 bytes_split (num, buf, in_blk_size);
555 case type_byteslines:
556 line_bytes_split (num);
563 if (close (input_desc) < 0)
564 error (EXIT_FAILURE, errno, "%s", infile);
565 if (output_desc >= 0 && close (output_desc) < 0)
566 error (EXIT_FAILURE, errno, "%s", outfile);