1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
34 #include "full-read.h"
35 #include "full-write.h"
37 #include "safe-read.h"
40 /* The official name of this program (e.g., no `g' prefix). */
41 #define PROGRAM_NAME "split"
43 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
45 #define DEFAULT_SUFFIX_LENGTH 2
47 /* The name this program was run with. */
50 /* Base name of output files. */
51 static char const *outbase;
53 /* Name of output files. */
56 /* Pointer to the end of the prefix in OUTFILE.
57 Suffixes are inserted here. */
58 static char *outfile_mid;
60 /* Length of OUTFILE's suffix. */
61 static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
63 /* Name of input file. May be "-". */
66 /* Descriptor on which input file is open. */
67 static int input_desc;
69 /* Descriptor on which output file is open. */
70 static int output_desc;
72 /* If nonzero, print a diagnostic on standard error just before each
73 output file is opened. */
76 static struct option const longopts[] =
78 {"bytes", required_argument, NULL, 'b'},
79 {"lines", required_argument, NULL, 'l'},
80 {"line-bytes", required_argument, NULL, 'C'},
81 {"suffix-length", required_argument, NULL, 'a'},
82 {"verbose", no_argument, &verbose, 0},
83 {GETOPT_HELP_OPTION_DECL},
84 {GETOPT_VERSION_OPTION_DECL},
92 fprintf (stderr, _("Try `%s --help' for more information.\n"),
97 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
101 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
102 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
106 Mandatory arguments to long options are mandatory for short options too.\n\
108 fprintf (stdout, _("\
109 -a, --suffix-length=N use suffixes of length N (default %d)\n\
110 -b, --bytes=SIZE put SIZE bytes per output file\n\
111 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
112 -l, --lines=NUMBER put NUMBER lines per output file\n\
113 "), DEFAULT_SUFFIX_LENGTH);
115 --verbose print a diagnostic to standard error just\n\
116 before each output file is opened\n\
118 fputs (HELP_OPTION_DESCRIPTION, stdout);
119 fputs (VERSION_OPTION_DESCRIPTION, stdout);
122 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
124 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
126 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
129 /* Compute the next sequential output file name and store it into the
133 next_file_name (void)
137 /* Allocate and initialize the first file name. */
139 size_t outbase_length = strlen (outbase);
140 size_t outfile_length = outbase_length + suffix_length;
141 if (outfile_length + 1 < outbase_length)
143 outfile = xmalloc (outfile_length + 1);
144 outfile_mid = outfile + outbase_length;
145 memcpy (outfile, outbase, outbase_length);
146 memset (outfile_mid, 'a', suffix_length);
147 outfile[outfile_length] = 0;
149 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
150 /* POSIX requires that if the output file name is too long for
151 its directory, `split' must fail without creating any files.
152 This must be checked for explicitly on operating systems that
153 silently truncate file names. */
155 char *dir = dir_name (outfile);
156 long name_max = pathconf (dir, _PC_NAME_MAX);
157 if (0 <= name_max && name_max < base_len (base_name (outfile)))
158 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
165 /* Increment the suffix in place, if possible. */
168 for (p = outfile_mid + suffix_length; outfile_mid < p; *--p = 'a')
171 error (EXIT_FAILURE, 0, _("Output file suffixes exhausted"));
175 /* Write BYTES bytes at BP to an output file.
176 If NEW_FILE_FLAG is nonzero, open the next output file.
177 Otherwise add to the same output file already in use. */
180 cwrite (int new_file_flag, const char *bp, size_t bytes)
184 if (output_desc >= 0 && close (output_desc) < 0)
185 error (EXIT_FAILURE, errno, "%s", outfile);
189 fprintf (stderr, _("creating file `%s'\n"), outfile);
190 output_desc = open (outfile,
191 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
193 error (EXIT_FAILURE, errno, "%s", outfile);
195 if (full_write (output_desc, bp, bytes) != bytes)
196 error (EXIT_FAILURE, errno, "%s", outfile);
199 /* Split into pieces of exactly NCHARS bytes.
200 Use buffer BUF, whose size is BUFSIZE. */
203 bytes_split (size_t n_bytes, char *buf, size_t bufsize)
206 int new_file_flag = 1;
208 size_t to_write = n_bytes;
213 n_read = full_read (input_desc, buf, bufsize);
214 if (n_read == SAFE_READ_ERROR)
215 error (EXIT_FAILURE, errno, "%s", infile);
220 if (to_read < to_write)
222 if (to_read) /* do not write 0 bytes! */
224 cwrite (new_file_flag, bp_out, to_read);
231 cwrite (new_file_flag, bp_out, to_write);
238 while (n_read == bufsize);
241 /* Split into pieces of exactly NLINES lines.
242 Use buffer BUF, whose size is BUFSIZE. */
245 lines_split (size_t nlines, char *buf, size_t bufsize)
248 char *bp, *bp_out, *eob;
249 int new_file_flag = 1;
254 n_read = full_read (input_desc, buf, bufsize);
255 if (n_read == SAFE_READ_ERROR)
256 error (EXIT_FAILURE, errno, "%s", infile);
262 bp = memchr (bp, '\n', eob - bp + 1);
265 if (eob != bp_out) /* do not write 0 bytes! */
267 size_t len = eob - bp_out;
268 cwrite (new_file_flag, bp_out, len);
277 cwrite (new_file_flag, bp_out, bp - bp_out);
284 while (n_read == bufsize);
287 /* Split into pieces that are as large as possible while still not more
288 than NCHARS bytes, and are split on line boundaries except
289 where lines longer than NCHARS bytes occur. */
292 line_bytes_split (size_t n_bytes)
297 size_t n_buffered = 0;
298 char *buf = (char *) xmalloc (n_bytes);
302 /* Fill up the full buffer size from the input file. */
304 n_read = full_read (input_desc, buf + n_buffered, n_bytes - n_buffered);
305 if (n_read == SAFE_READ_ERROR)
306 error (EXIT_FAILURE, errno, "%s", infile);
308 n_buffered += n_read;
309 if (n_buffered != n_bytes)
312 /* Find where to end this chunk. */
313 bp = buf + n_buffered;
314 if (n_buffered == n_bytes)
316 while (bp > buf && bp[-1] != '\n')
320 /* If chunk has no newlines, use all the chunk. */
322 bp = buf + n_buffered;
324 /* Output the chars as one output file. */
325 cwrite (1, buf, bp - buf);
327 /* Discard the chars we just output; move rest of chunk
328 down to be the start of the next chunk. Source and
329 destination probably overlap. */
330 n_buffered -= bp - buf;
332 memmove (buf, bp, n_buffered);
338 #define FAIL_ONLY_ONE_WAY() \
341 error (0, 0, _("cannot split in more than one way")); \
342 usage (EXIT_FAILURE); \
347 main (int argc, char **argv)
349 struct stat stat_buf;
350 size_t num; /* numeric argument from command line */
353 type_undef, type_bytes, type_byteslines, type_lines, type_digits
354 } split_type = type_undef;
355 size_t in_blk_size; /* optimal block size of input file device */
356 char *buf; /* file i/o buffer */
359 int digits_optind = 0;
361 program_name = argv[0];
362 setlocale (LC_ALL, "");
363 bindtextdomain (PACKAGE, LOCALEDIR);
364 textdomain (PACKAGE);
366 atexit (close_stdout);
368 /* Parse command line options. */
375 /* This is the argv-index of the option we will read next. */
376 int this_optind = optind ? optind : 1;
379 c = getopt_long (argc, argv, "0123456789C:a:b:l:", longopts, NULL);
391 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
394 error (0, 0, _("%s: invalid suffix length"), optarg);
395 usage (EXIT_FAILURE);
402 if (split_type != type_undef)
403 FAIL_ONLY_ONE_WAY ();
404 split_type = type_bytes;
405 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
406 || tmp_long < 0 || tmp_long > INT_MAX)
408 error (0, 0, _("%s: invalid number of bytes"), optarg);
409 usage (EXIT_FAILURE);
411 accum = /* FIXME: */ (int) tmp_long;
415 if (split_type != type_undef)
416 FAIL_ONLY_ONE_WAY ();
417 split_type = type_lines;
418 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
419 || tmp_long < 0 || tmp_long > INT_MAX)
421 error (0, 0, _("%s: invalid number of lines"), optarg);
422 usage (EXIT_FAILURE);
424 accum = /* FIXME */ (int) tmp_long;
428 if (split_type != type_undef)
429 FAIL_ONLY_ONE_WAY ();
430 split_type = type_byteslines;
431 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
432 || tmp_long < 0 || tmp_long > INT_MAX)
434 error (0, 0, _("%s: invalid number of bytes"), optarg);
435 usage (EXIT_FAILURE);
437 accum = /* FIXME */ (int) tmp_long;
450 if (split_type != type_undef && split_type != type_digits)
451 FAIL_ONLY_ONE_WAY ();
452 if (digits_optind != 0 && digits_optind != this_optind)
453 accum = 0; /* More than one number given; ignore other. */
454 digits_optind = this_optind;
455 split_type = type_digits;
456 accum = accum * 10 + c - '0';
459 case_GETOPT_HELP_CHAR;
461 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
464 usage (EXIT_FAILURE);
468 if (digits_optind && 200112 <= posix2_version ())
470 error (0, 0, _("`-%d' option is obsolete; use `-l %d'"), accum, accum);
471 usage (EXIT_FAILURE);
474 /* Handle default case. */
475 if (split_type == type_undef)
477 split_type = type_lines;
483 error (0, 0, _("invalid number"));
484 usage (EXIT_FAILURE);
488 /* Get out the filename arguments. */
491 infile = argv[optind++];
494 outbase = argv[optind++];
498 error (0, 0, _("too many arguments"));
499 usage (EXIT_FAILURE);
502 /* Open the input file. */
503 if (STREQ (infile, "-"))
507 input_desc = open (infile, O_RDONLY);
509 error (EXIT_FAILURE, errno, "%s", infile);
511 /* Binary I/O is safer when bytecounts are used. */
512 SET_BINARY (input_desc);
514 /* No output file is open now. */
517 /* Get the optimal block size of input device and make a buffer. */
519 if (fstat (input_desc, &stat_buf) < 0)
520 error (EXIT_FAILURE, errno, "%s", infile);
521 in_blk_size = ST_BLKSIZE (stat_buf);
523 buf = xmalloc (in_blk_size + 1);
529 lines_split (num, buf, in_blk_size);
533 bytes_split (num, buf, in_blk_size);
536 case type_byteslines:
537 line_bytes_split (num);
544 if (close (input_desc) < 0)
545 error (EXIT_FAILURE, errno, "%s", infile);
546 if (output_desc >= 0 && close (output_desc) < 0)
547 error (EXIT_FAILURE, errno, "%s", outfile);