1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
34 #include "full-write.h"
35 #include "safe-read.h"
38 /* The official name of this program (e.g., no `g' prefix). */
39 #define PROGRAM_NAME "split"
41 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
43 #define DEFAULT_SUFFIX_LENGTH 2
45 /* The name this program was run with. */
48 /* Base name of output files. */
49 static char const *outbase;
51 /* Name of output files. */
54 /* Pointer to the end of the prefix in OUTFILE.
55 Suffixes are inserted here. */
56 static char *outfile_mid;
58 /* Length of OUTFILE's suffix. */
59 static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
61 /* Name of input file. May be "-". */
64 /* Descriptor on which input file is open. */
65 static int input_desc;
67 /* Descriptor on which output file is open. */
68 static int output_desc;
70 /* If nonzero, print a diagnostic on standard error just before each
71 output file is opened. */
74 static char const shortopts[] = "a:b:l:C:"
75 #if POSIX2_VERSION < 200112
80 static struct option const longopts[] =
82 {"bytes", required_argument, NULL, 'b'},
83 {"lines", required_argument, NULL, 'l'},
84 {"line-bytes", required_argument, NULL, 'C'},
85 {"suffix-length", required_argument, NULL, 'a'},
86 {"verbose", no_argument, NULL, 2},
87 {GETOPT_HELP_OPTION_DECL},
88 {GETOPT_VERSION_OPTION_DECL},
96 fprintf (stderr, _("Try `%s --help' for more information.\n"),
101 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
105 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
106 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
110 Mandatory arguments to long options are mandatory for short options too.\n\
112 fprintf (stdout, _("\
113 -a, --suffix-length=N use suffixes of length N (default %d)\n\
114 -b, --bytes=SIZE put SIZE bytes per output file\n\
115 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
116 -l, --lines=NUMBER put NUMBER lines per output file\n\
117 "), DEFAULT_SUFFIX_LENGTH);
118 if (POSIX2_VERSION < 200112)
120 -NUMBER (obsolete) same as -l NUMBER\n\
123 --verbose print a diagnostic to standard error just\n\
124 before each output file is opened\n\
126 fputs (HELP_OPTION_DESCRIPTION, stdout);
127 fputs (VERSION_OPTION_DESCRIPTION, stdout);
130 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
132 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
134 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
137 /* Compute the next sequential output file name and store it into the
141 next_file_name (void)
145 /* Allocate and initialize the first file name. */
147 size_t outbase_length = strlen (outbase);
148 size_t outfile_length = outbase_length + suffix_length;
149 if (outfile_length + 1 < outbase_length)
151 outfile = xmalloc (outfile_length + 1);
152 outfile_mid = outfile + outbase_length;
153 memcpy (outfile, outbase, outbase_length);
154 memset (outfile_mid, 'a', suffix_length);
155 outfile[outfile_length] = 0;
157 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
158 /* POSIX requires that if the output file name is too long for
159 its directory, `split' must fail without creating any files.
160 This must be checked for explicitly on operating systems that
161 silently truncate file names. */
163 char *dir = dir_name (outfile);
164 long name_max = pathconf (dir, _PC_NAME_MAX);
165 if (0 <= name_max && name_max < base_len (base_name (outfile)))
166 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
173 /* Increment the suffix in place, if possible. */
176 for (p = outfile_mid + suffix_length; outfile_mid < p; *--p = 'a')
179 error (EXIT_FAILURE, 0, _("Output file suffixes exhausted"));
183 /* Write BYTES bytes at BP to an output file.
184 If NEW_FILE_FLAG is nonzero, open the next output file.
185 Otherwise add to the same output file already in use. */
188 cwrite (int new_file_flag, const char *bp, int bytes)
192 if (output_desc >= 0 && close (output_desc) < 0)
193 error (EXIT_FAILURE, errno, "%s", outfile);
197 fprintf (stderr, _("creating file `%s'\n"), outfile);
198 output_desc = open (outfile,
199 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
201 error (EXIT_FAILURE, errno, "%s", outfile);
203 if (full_write (output_desc, bp, bytes) != bytes)
204 error (EXIT_FAILURE, errno, "%s", outfile);
207 /* Read NCHARS bytes from the input file into BUF.
208 Return the number of bytes successfully read.
209 If this is less than NCHARS, do not call `stdread' again. */
212 stdread (char *buf, int nchars)
215 int to_be_read = nchars;
219 n_read = safe_read (input_desc, buf, to_be_read);
224 to_be_read -= n_read;
227 return nchars - to_be_read;
230 /* Split into pieces of exactly NCHARS bytes.
231 Use buffer BUF, whose size is BUFSIZE. */
234 bytes_split (int nchars, char *buf, int bufsize)
237 int new_file_flag = 1;
239 int to_write = nchars;
244 n_read = stdread (buf, bufsize);
246 error (EXIT_FAILURE, errno, "%s", infile);
251 if (to_read < to_write)
253 if (to_read) /* do not write 0 bytes! */
255 cwrite (new_file_flag, bp_out, to_read);
263 cwrite (new_file_flag, bp_out, to_write);
271 while (n_read == bufsize);
274 /* Split into pieces of exactly NLINES lines.
275 Use buffer BUF, whose size is BUFSIZE. */
278 lines_split (int nlines, char *buf, int bufsize)
281 char *bp, *bp_out, *eob;
282 int new_file_flag = 1;
287 n_read = stdread (buf, bufsize);
289 error (EXIT_FAILURE, errno, "%s", infile);
295 while (*bp++ != '\n')
296 ; /* this semicolon takes most of the time */
299 if (eob != bp_out) /* do not write 0 bytes! */
301 cwrite (new_file_flag, bp_out, eob - bp_out);
309 cwrite (new_file_flag, bp_out, bp - bp_out);
316 while (n_read == bufsize);
319 /* Split into pieces that are as large as possible while still not more
320 than NCHARS bytes, and are split on line boundaries except
321 where lines longer than NCHARS bytes occur. */
324 line_bytes_split (int nchars)
330 char *buf = (char *) xmalloc (nchars);
334 /* Fill up the full buffer size from the input file. */
336 n_read = stdread (buf + n_buffered, nchars - n_buffered);
338 error (EXIT_FAILURE, errno, "%s", infile);
340 n_buffered += n_read;
341 if (n_buffered != nchars)
344 /* Find where to end this chunk. */
345 bp = buf + n_buffered;
346 if (n_buffered == nchars)
348 while (bp > buf && bp[-1] != '\n')
352 /* If chunk has no newlines, use all the chunk. */
354 bp = buf + n_buffered;
356 /* Output the chars as one output file. */
357 cwrite (1, buf, bp - buf);
359 /* Discard the chars we just output; move rest of chunk
360 down to be the start of the next chunk. Source and
361 destination probably overlap. */
362 n_buffered -= bp - buf;
364 memmove (buf, bp, n_buffered);
371 main (int argc, char **argv)
373 struct stat stat_buf;
374 int num; /* numeric argument from command line */
377 type_undef, type_bytes, type_byteslines, type_lines, type_digits
378 } split_type = type_undef;
379 int in_blk_size; /* optimal block size of input file device */
380 char *buf; /* file i/o buffer */
383 int digits_optind = 0;
385 program_name = argv[0];
386 setlocale (LC_ALL, "");
387 bindtextdomain (PACKAGE, LOCALEDIR);
388 textdomain (PACKAGE);
390 atexit (close_stdout);
392 /* Parse command line options. */
399 /* This is the argv-index of the option we will read next. */
400 int this_optind = optind ? optind : 1;
403 c = getopt_long (argc, argv, shortopts, longopts, NULL);
413 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
414 || tmp_long < 0 || tmp_long > SIZE_MAX)
416 error (0, 0, _("%s: invalid suffix length"), optarg);
417 usage (EXIT_FAILURE);
419 suffix_length = tmp_long;
423 if (split_type != type_undef)
425 error (0, 0, _("cannot split in more than one way"));
426 usage (EXIT_FAILURE);
428 split_type = type_bytes;
429 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
430 || tmp_long < 0 || tmp_long > INT_MAX)
432 error (0, 0, _("%s: invalid number of bytes"), optarg);
433 usage (EXIT_FAILURE);
435 accum = (int) tmp_long;
439 if (split_type != type_undef)
441 error (0, 0, _("cannot split in more than one way"));
442 usage (EXIT_FAILURE);
444 split_type = type_lines;
445 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
446 || tmp_long < 0 || tmp_long > INT_MAX)
448 error (0, 0, _("%s: invalid number of lines"), optarg);
449 usage (EXIT_FAILURE);
451 accum = (int) tmp_long;
455 if (split_type != type_undef)
457 error (0, 0, _("cannot split in more than one way"));
458 usage (EXIT_FAILURE);
461 split_type = type_byteslines;
462 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
463 || tmp_long < 0 || tmp_long > INT_MAX)
465 error (0, 0, _("%s: invalid number of bytes"), optarg);
466 usage (EXIT_FAILURE);
468 accum = (int) tmp_long;
471 #if POSIX2_VERSION < 200112
482 if (split_type != type_undef && split_type != type_digits)
484 error (0, 0, _("cannot split in more than one way"));
485 usage (EXIT_FAILURE);
487 if (digits_optind != 0 && digits_optind != this_optind)
488 accum = 0; /* More than one number given; ignore other. */
489 digits_optind = this_optind;
490 split_type = type_digits;
491 accum = accum * 10 + c - '0';
499 case_GETOPT_HELP_CHAR;
501 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
504 usage (EXIT_FAILURE);
508 if (OBSOLETE_OPTION_WARNINGS
509 && digits_optind && ! getenv ("POSIXLY_CORRECT"))
510 error (0, 0, _("warning: `split -%d' is obsolete; use `split -l %d'"),
513 /* Handle default case. */
514 if (split_type == type_undef)
516 split_type = type_lines;
522 error (0, 0, _("invalid number"));
523 usage (EXIT_FAILURE);
527 /* Get out the filename arguments. */
530 infile = argv[optind++];
533 outbase = argv[optind++];
537 error (0, 0, _("too many arguments"));
538 usage (EXIT_FAILURE);
541 /* Open the input file. */
542 if (STREQ (infile, "-"))
546 input_desc = open (infile, O_RDONLY);
548 error (EXIT_FAILURE, errno, "%s", infile);
550 /* Binary I/O is safer when bytecounts are used. */
551 SET_BINARY (input_desc);
553 /* No output file is open now. */
556 /* Get the optimal block size of input device and make a buffer. */
558 if (fstat (input_desc, &stat_buf) < 0)
559 error (EXIT_FAILURE, errno, "%s", infile);
560 in_blk_size = ST_BLKSIZE (stat_buf);
562 buf = xmalloc (in_blk_size + 1);
568 lines_split (num, buf, in_blk_size);
572 bytes_split (num, buf, in_blk_size);
575 case type_byteslines:
576 line_bytes_split (num);
583 if (close (input_desc) < 0)
584 error (EXIT_FAILURE, errno, "%s", infile);
585 if (output_desc >= 0 && close (output_desc) < 0)
586 error (EXIT_FAILURE, errno, "%s", outfile);