1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
34 #include "full-write.h"
36 #include "safe-read.h"
39 /* The official name of this program (e.g., no `g' prefix). */
40 #define PROGRAM_NAME "split"
42 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
44 #define DEFAULT_SUFFIX_LENGTH 2
46 /* The name this program was run with. */
49 /* Base name of output files. */
50 static char const *outbase;
52 /* Name of output files. */
55 /* Pointer to the end of the prefix in OUTFILE.
56 Suffixes are inserted here. */
57 static char *outfile_mid;
59 /* Length of OUTFILE's suffix. */
60 static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
62 /* Name of input file. May be "-". */
65 /* Descriptor on which input file is open. */
66 static int input_desc;
68 /* Descriptor on which output file is open. */
69 static int output_desc;
71 /* If nonzero, print a diagnostic on standard error just before each
72 output file is opened. */
75 static struct option const longopts[] =
77 {"bytes", required_argument, NULL, 'b'},
78 {"lines", required_argument, NULL, 'l'},
79 {"line-bytes", required_argument, NULL, 'C'},
80 {"suffix-length", required_argument, NULL, 'a'},
81 {"verbose", no_argument, NULL, 2},
82 {GETOPT_HELP_OPTION_DECL},
83 {GETOPT_VERSION_OPTION_DECL},
91 fprintf (stderr, _("Try `%s --help' for more information.\n"),
96 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
100 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
101 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
105 Mandatory arguments to long options are mandatory for short options too.\n\
107 fprintf (stdout, _("\
108 -a, --suffix-length=N use suffixes of length N (default %d)\n\
109 -b, --bytes=SIZE put SIZE bytes per output file\n\
110 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
111 -l, --lines=NUMBER put NUMBER lines per output file\n\
112 "), DEFAULT_SUFFIX_LENGTH);
114 --verbose print a diagnostic to standard error just\n\
115 before each output file is opened\n\
117 fputs (HELP_OPTION_DESCRIPTION, stdout);
118 fputs (VERSION_OPTION_DESCRIPTION, stdout);
121 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
123 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
125 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
128 /* Compute the next sequential output file name and store it into the
132 next_file_name (void)
136 /* Allocate and initialize the first file name. */
138 size_t outbase_length = strlen (outbase);
139 size_t outfile_length = outbase_length + suffix_length;
140 if (outfile_length + 1 < outbase_length)
142 outfile = xmalloc (outfile_length + 1);
143 outfile_mid = outfile + outbase_length;
144 memcpy (outfile, outbase, outbase_length);
145 memset (outfile_mid, 'a', suffix_length);
146 outfile[outfile_length] = 0;
148 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
149 /* POSIX requires that if the output file name is too long for
150 its directory, `split' must fail without creating any files.
151 This must be checked for explicitly on operating systems that
152 silently truncate file names. */
154 char *dir = dir_name (outfile);
155 long name_max = pathconf (dir, _PC_NAME_MAX);
156 if (0 <= name_max && name_max < base_len (base_name (outfile)))
157 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
164 /* Increment the suffix in place, if possible. */
167 for (p = outfile_mid + suffix_length; outfile_mid < p; *--p = 'a')
170 error (EXIT_FAILURE, 0, _("Output file suffixes exhausted"));
174 /* Write BYTES bytes at BP to an output file.
175 If NEW_FILE_FLAG is nonzero, open the next output file.
176 Otherwise add to the same output file already in use. */
179 cwrite (int new_file_flag, const char *bp, size_t bytes)
183 if (output_desc >= 0 && close (output_desc) < 0)
184 error (EXIT_FAILURE, errno, "%s", outfile);
188 fprintf (stderr, _("creating file `%s'\n"), outfile);
189 output_desc = open (outfile,
190 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
192 error (EXIT_FAILURE, errno, "%s", outfile);
194 if (full_write (output_desc, bp, bytes) != bytes)
195 error (EXIT_FAILURE, errno, "%s", outfile);
198 /* Split into pieces of exactly NCHARS bytes.
199 Use buffer BUF, whose size is BUFSIZE. */
202 bytes_split (size_t nchars, char *buf, size_t bufsize)
205 int new_file_flag = 1;
207 size_t to_write = nchars;
212 n_read = safe_read (input_desc, buf, bufsize);
213 if (n_read == SAFE_READ_ERROR)
214 error (EXIT_FAILURE, errno, "%s", infile);
219 if (to_read < to_write)
221 if (to_read) /* do not write 0 bytes! */
223 cwrite (new_file_flag, bp_out, to_read);
231 cwrite (new_file_flag, bp_out, to_write);
239 while (n_read == bufsize);
242 /* Split into pieces of exactly NLINES lines.
243 Use buffer BUF, whose size is BUFSIZE. */
246 lines_split (size_t nlines, char *buf, size_t bufsize)
249 char *bp, *bp_out, *eob;
250 int new_file_flag = 1;
255 n_read = safe_read (input_desc, buf, bufsize);
256 if (n_read == SAFE_READ_ERROR)
257 error (EXIT_FAILURE, errno, "%s", infile);
263 bp = memchr (bp, '\n', eob - bp + 1);
266 if (eob != bp_out) /* do not write 0 bytes! */
268 size_t len = eob - bp_out;
269 cwrite (new_file_flag, bp_out, len);
278 cwrite (new_file_flag, bp_out, bp - bp_out);
285 while (n_read == bufsize);
288 /* Split into pieces that are as large as possible while still not more
289 than NCHARS bytes, and are split on line boundaries except
290 where lines longer than NCHARS bytes occur. */
293 line_bytes_split (size_t nchars)
298 size_t n_buffered = 0;
299 char *buf = (char *) xmalloc (nchars);
303 /* Fill up the full buffer size from the input file. */
305 n_read = safe_read (input_desc, buf + n_buffered, nchars - n_buffered);
306 if (n_read == SAFE_READ_ERROR)
307 error (EXIT_FAILURE, errno, "%s", infile);
309 n_buffered += n_read;
310 if (n_buffered != nchars)
313 /* Find where to end this chunk. */
314 bp = buf + n_buffered;
315 if (n_buffered == nchars)
317 while (bp > buf && bp[-1] != '\n')
321 /* If chunk has no newlines, use all the chunk. */
323 bp = buf + n_buffered;
325 /* Output the chars as one output file. */
326 cwrite (1, buf, bp - buf);
328 /* Discard the chars we just output; move rest of chunk
329 down to be the start of the next chunk. Source and
330 destination probably overlap. */
331 n_buffered -= bp - buf;
333 memmove (buf, bp, n_buffered);
339 #define FAIL_ONLY_ONE_WAY() \
342 error (0, 0, _("cannot split in more than one way")); \
343 usage (EXIT_FAILURE); \
348 main (int argc, char **argv)
350 struct stat stat_buf;
351 size_t num; /* numeric argument from command line */
354 type_undef, type_bytes, type_byteslines, type_lines, type_digits
355 } split_type = type_undef;
356 size_t in_blk_size; /* optimal block size of input file device */
357 char *buf; /* file i/o buffer */
360 int digits_optind = 0;
362 program_name = argv[0];
363 setlocale (LC_ALL, "");
364 bindtextdomain (PACKAGE, LOCALEDIR);
365 textdomain (PACKAGE);
367 atexit (close_stdout);
369 /* Parse command line options. */
376 /* This is the argv-index of the option we will read next. */
377 int this_optind = optind ? optind : 1;
380 c = getopt_long (argc, argv, "0123456789C:a:b:l:", longopts, NULL);
392 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
395 error (0, 0, _("%s: invalid suffix length"), optarg);
396 usage (EXIT_FAILURE);
403 if (split_type != type_undef)
404 FAIL_ONLY_ONE_WAY ();
405 split_type = type_bytes;
406 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
407 || tmp_long < 0 || tmp_long > INT_MAX)
409 error (0, 0, _("%s: invalid number of bytes"), optarg);
410 usage (EXIT_FAILURE);
412 accum = /* FIXME: */ (int) tmp_long;
416 if (split_type != type_undef)
417 FAIL_ONLY_ONE_WAY ();
418 split_type = type_lines;
419 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
420 || tmp_long < 0 || tmp_long > INT_MAX)
422 error (0, 0, _("%s: invalid number of lines"), optarg);
423 usage (EXIT_FAILURE);
425 accum = /* FIXME */ (int) tmp_long;
429 if (split_type != type_undef)
430 FAIL_ONLY_ONE_WAY ();
431 split_type = type_byteslines;
432 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
433 || tmp_long < 0 || tmp_long > INT_MAX)
435 error (0, 0, _("%s: invalid number of bytes"), optarg);
436 usage (EXIT_FAILURE);
438 accum = /* FIXME */ (int) tmp_long;
451 if (split_type != type_undef && split_type != type_digits)
452 FAIL_ONLY_ONE_WAY ();
453 if (digits_optind != 0 && digits_optind != this_optind)
454 accum = 0; /* More than one number given; ignore other. */
455 digits_optind = this_optind;
456 split_type = type_digits;
457 accum = accum * 10 + c - '0';
464 case_GETOPT_HELP_CHAR;
466 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
469 usage (EXIT_FAILURE);
473 if (digits_optind && 200112 <= posix2_version ())
475 error (0, 0, _("`-%d' option is obsolete; use `-l %d'"), accum, accum);
476 usage (EXIT_FAILURE);
479 /* Handle default case. */
480 if (split_type == type_undef)
482 split_type = type_lines;
488 error (0, 0, _("invalid number"));
489 usage (EXIT_FAILURE);
493 /* Get out the filename arguments. */
496 infile = argv[optind++];
499 outbase = argv[optind++];
503 error (0, 0, _("too many arguments"));
504 usage (EXIT_FAILURE);
507 /* Open the input file. */
508 if (STREQ (infile, "-"))
512 input_desc = open (infile, O_RDONLY);
514 error (EXIT_FAILURE, errno, "%s", infile);
516 /* Binary I/O is safer when bytecounts are used. */
517 SET_BINARY (input_desc);
519 /* No output file is open now. */
522 /* Get the optimal block size of input device and make a buffer. */
524 if (fstat (input_desc, &stat_buf) < 0)
525 error (EXIT_FAILURE, errno, "%s", infile);
526 in_blk_size = ST_BLKSIZE (stat_buf);
528 buf = xmalloc (in_blk_size + 1);
534 lines_split (num, buf, in_blk_size);
538 bytes_split (num, buf, in_blk_size);
541 case type_byteslines:
542 line_bytes_split (num);
549 if (close (input_desc) < 0)
550 error (EXIT_FAILURE, errno, "%s", infile);
551 if (output_desc >= 0 && close (output_desc) < 0)
552 error (EXIT_FAILURE, errno, "%s", outfile);