1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
23 #include <sys/types.h>
31 #include "fd-reopen.h"
34 #include "safe-read.h"
38 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
41 # define SA_NOCLDSTOP 0
42 # define sigprocmask(How, Set, Oset) /* empty */
44 # if ! HAVE_SIGINTERRUPT
45 # define siginterrupt(sig, flag) /* empty */
49 /* The official name of this program (e.g., no `g' prefix). */
50 #define PROGRAM_NAME "csplit"
52 #define AUTHORS "Stuart Kemp", "David MacKenzie"
54 /* Increment size of area for control records. */
57 /* The default prefix for output file names. */
58 #define DEFAULT_PREFIX "xx"
60 /* A compiled pattern arg. */
63 intmax_t offset; /* Offset from regexp to split at. */
64 uintmax_t lines_required; /* Number of lines required. */
65 uintmax_t repeat; /* Repeat count. */
66 int argnum; /* ARGV index. */
67 bool repeat_forever; /* True if `*' used as a repeat count. */
68 bool ignore; /* If true, produce no output (for regexp). */
69 bool regexpr; /* True if regular expression was used. */
70 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
73 /* Initial size of data area in buffers. */
74 #define START_SIZE 8191
76 /* Increment size for data area. */
77 #define INCR_SIZE 2048
79 /* Number of lines kept in each node in line list. */
83 /* Some small values to test the algorithms. */
84 # define START_SIZE 200
89 /* A string with a length count. */
96 /* Pointers to the beginnings of lines in the buffer area.
97 These structures are linked together if needed. */
100 size_t used; /* Number of offsets used in this struct. */
101 size_t insert_index; /* Next offset to use when inserting line. */
102 size_t retrieve_index; /* Next index to use when retrieving line. */
103 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
104 struct line *next; /* Next in linked list. */
107 /* The structure to hold the input lines.
108 Contains a pointer to the data area and a list containing
109 pointers to the individual lines. */
112 size_t bytes_alloc; /* Size of the buffer area. */
113 size_t bytes_used; /* Bytes used in the buffer area. */
114 uintmax_t start_line; /* First line number in this buffer. */
115 uintmax_t first_available; /* First line that can be retrieved. */
116 size_t num_lines; /* Number of complete lines in this buffer. */
117 char *buffer; /* Data area. */
118 struct line *line_start; /* Head of list of pointers to lines. */
119 struct line *curr_line; /* The line start record currently in use. */
120 struct buffer_record *next;
123 static void close_output_file (void);
124 static void create_output_file (void);
125 static void delete_all_files (bool);
126 static void save_line_to_file (const struct cstring *line);
127 void usage (int status);
129 /* The name this program was run with. */
132 /* Start of buffer list. */
133 static struct buffer_record *head = NULL;
135 /* Partially read line. */
136 static char *hold_area = NULL;
138 /* Number of bytes in `hold_area'. */
139 static size_t hold_count = 0;
141 /* Number of the last line in the buffers. */
142 static uintmax_t last_line_number = 0;
144 /* Number of the line currently being examined. */
145 static uintmax_t current_line = 0;
147 /* If true, we have read EOF. */
148 static bool have_read_eof = false;
150 /* Name of output files. */
151 static char *volatile filename_space = NULL;
153 /* Prefix part of output file names. */
154 static char const *volatile prefix = NULL;
156 /* Suffix part of output file names. */
157 static char *volatile suffix = NULL;
159 /* Number of digits to use in output file names. */
160 static int volatile digits = 2;
162 /* Number of files created so far. */
163 static unsigned int volatile files_created = 0;
165 /* Number of bytes written to current file. */
166 static uintmax_t bytes_written;
168 /* Output file pointer. */
169 static FILE *output_stream = NULL;
171 /* Output file name. */
172 static char *output_filename = NULL;
174 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
175 static char **global_argv;
177 /* If true, do not print the count of bytes in each output file. */
178 static bool suppress_count;
180 /* If true, remove output files on error. */
181 static bool volatile remove_files;
183 /* If true, remove all output files which have a zero length. */
184 static bool elide_empty_files;
186 /* The compiled pattern arguments, which determine how to split
188 static struct control *controls;
190 /* Number of elements in `controls'. */
191 static size_t control_used;
193 /* The set of signals that are caught. */
194 static sigset_t caught_signals;
196 static struct option const longopts[] =
198 {"digits", required_argument, NULL, 'n'},
199 {"quiet", no_argument, NULL, 'q'},
200 {"silent", no_argument, NULL, 's'},
201 {"keep-files", no_argument, NULL, 'k'},
202 {"elide-empty-files", no_argument, NULL, 'z'},
203 {"prefix", required_argument, NULL, 'f'},
204 {"suffix-format", required_argument, NULL, 'b'},
205 {GETOPT_HELP_OPTION_DECL},
206 {GETOPT_VERSION_OPTION_DECL},
210 /* Optionally remove files created so far; then exit.
211 Called when an error detected. */
218 close_output_file ();
220 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
221 delete_all_files (false);
222 sigprocmask (SIG_SETMASK, &oldset, NULL);
225 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
236 error (0, 0, "%s", _("memory exhausted"));
241 interrupt_handler (int sig)
244 signal (sig, SIG_IGN);
246 delete_all_files (true);
248 signal (sig, SIG_DFL);
252 /* Keep track of NUM bytes of a partial line in buffer START.
253 These bytes will be retrieved later when another large buffer is read. */
256 save_to_hold_area (char *start, size_t num)
263 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
264 Return the number of bytes read. */
267 read_input (char *dest, size_t max_n_bytes)
271 if (max_n_bytes == 0)
274 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
277 have_read_eof = true;
279 if (bytes_read == SAFE_READ_ERROR)
281 error (0, errno, _("read error"));
288 /* Initialize existing line record P. */
291 clear_line_control (struct line *p)
295 p->retrieve_index = 0;
298 /* Return a new, initialized line record. */
301 new_line_control (void)
303 struct line *p = xmalloc (sizeof *p);
306 clear_line_control (p);
311 /* Record LINE_START, which is the address of the start of a line
312 of length LINE_LEN in the large buffer, in the lines buffer of B. */
315 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
319 /* If there is no existing area to keep line info, get some. */
320 if (b->line_start == NULL)
321 b->line_start = b->curr_line = new_line_control ();
323 /* If existing area for lines is full, get more. */
324 if (b->curr_line->used == CTRL_SIZE)
326 b->curr_line->next = new_line_control ();
327 b->curr_line = b->curr_line->next;
332 /* Record the start of the line, and update counters. */
333 l->starts[l->insert_index].str = line_start;
334 l->starts[l->insert_index].len = line_len;
339 /* Scan the buffer in B for newline characters
340 and record the line start locations and lengths in B.
341 Return the number of lines found in this buffer.
343 There may be an incomplete line at the end of the buffer;
344 a pointer is kept to this area, which will be used when
345 the next buffer is filled. */
348 record_line_starts (struct buffer_record *b)
350 char *line_start; /* Start of current line. */
351 char *line_end; /* End of each line found. */
352 size_t bytes_left; /* Length of incomplete last line. */
353 size_t lines; /* Number of lines found. */
354 size_t line_length; /* Length of each line found. */
356 if (b->bytes_used == 0)
360 line_start = b->buffer;
361 bytes_left = b->bytes_used;
365 line_end = memchr (line_start, '\n', bytes_left);
366 if (line_end == NULL)
368 line_length = line_end - line_start + 1;
369 keep_new_line (b, line_start, line_length);
370 bytes_left -= line_length;
371 line_start = line_end + 1;
375 /* Check for an incomplete last line. */
380 keep_new_line (b, line_start, bytes_left);
384 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
387 b->num_lines = lines;
388 b->first_available = b->start_line = last_line_number + 1;
389 last_line_number += lines;
394 /* Return a new buffer with room to store SIZE bytes, plus
395 an extra byte for safety. */
397 static struct buffer_record *
398 create_new_buffer (size_t size)
400 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
402 new_buffer->buffer = xmalloc (size + 1);
404 new_buffer->bytes_alloc = size;
405 new_buffer->line_start = new_buffer->curr_line = NULL;
410 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
411 least that size is currently free, use it, otherwise create a new one. */
413 static struct buffer_record *
414 get_new_buffer (size_t min_size)
416 struct buffer_record *new_buffer; /* Buffer to return. */
417 size_t alloc_size; /* Actual size that will be requested. */
419 alloc_size = START_SIZE;
420 if (alloc_size < min_size)
422 size_t s = min_size - alloc_size + INCR_SIZE - 1;
423 alloc_size += s - s % INCR_SIZE;
426 new_buffer = create_new_buffer (alloc_size);
428 new_buffer->num_lines = 0;
429 new_buffer->bytes_used = 0;
430 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
431 new_buffer->next = NULL;
437 free_buffer (struct buffer_record *buf)
443 /* Append buffer BUF to the linked list of buffers that contain
444 some data yet to be processed. */
447 save_buffer (struct buffer_record *buf)
449 struct buffer_record *p;
452 buf->curr_line = buf->line_start;
458 for (p = head; p->next; p = p->next)
464 /* Fill a buffer of input.
466 Set the initial size of the buffer to a default.
467 Fill the buffer (from the hold area and input stream)
468 and find the individual lines.
469 If no lines are found (the buffer is too small to hold the next line),
470 release the current buffer (whose contents would have been put in the
471 hold area) and repeat the process with another large buffer until at least
472 one entire line has been read.
474 Return true if a new buffer was obtained, otherwise false
475 (in which case end-of-file must have been encountered). */
480 struct buffer_record *b;
481 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
482 size_t bytes_avail; /* Size of new buffer created. */
483 size_t lines_found; /* Number of lines in this new buffer. */
484 char *p; /* Place to load into buffer. */
489 /* We must make the buffer at least as large as the amount of data
490 in the partial line left over from the last call. */
491 if (bytes_wanted < hold_count)
492 bytes_wanted = hold_count;
496 b = get_new_buffer (bytes_wanted);
497 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
500 /* First check the `holding' area for a partial line. */
503 memcpy (p, hold_area, hold_count);
505 b->bytes_used += hold_count;
506 bytes_avail -= hold_count;
510 b->bytes_used += read_input (p, bytes_avail);
512 lines_found = record_line_starts (b);
516 if (lines_found || have_read_eof)
519 if (xalloc_oversized (2, b->bytes_alloc))
521 bytes_wanted = 2 * b->bytes_alloc;
531 return lines_found != 0;
534 /* Return the line number of the first line that has not yet been retrieved. */
537 get_first_line_in_buffer (void)
539 if (head == NULL && !load_buffer ())
540 error (EXIT_FAILURE, errno, _("input disappeared"));
542 return head->first_available;
545 /* Return a pointer to the logical first line in the buffer and make the
546 next line the logical first line.
547 Return NULL if there is no more input. */
549 static struct cstring *
552 /* If non-NULL, this is the buffer for which the previous call
553 returned the final line. So now, presuming that line has been
554 processed, we can free the buffer and reset this pointer. */
555 static struct buffer_record *prev_buf = NULL;
557 struct cstring *line; /* Return value. */
558 struct line *l; /* For convenience. */
562 free_buffer (prev_buf);
566 if (head == NULL && !load_buffer ())
569 if (current_line < head->first_available)
570 current_line = head->first_available;
572 ++(head->first_available);
576 line = &l->starts[l->retrieve_index];
578 /* Advance index to next line. */
579 if (++l->retrieve_index == l->used)
581 /* Go on to the next line record. */
582 head->curr_line = l->next;
583 if (head->curr_line == NULL || head->curr_line->used == 0)
585 /* Go on to the next data block.
586 but first record the current one so we can free it
587 once the line we're returning has been processed. */
596 /* Search the buffers for line LINENUM, reading more input if necessary.
597 Return a pointer to the line, or NULL if it is not found in the file. */
599 static struct cstring *
600 find_line (uintmax_t linenum)
602 struct buffer_record *b;
604 if (head == NULL && !load_buffer ())
607 if (linenum < head->start_line)
612 if (linenum < b->start_line + b->num_lines)
614 /* The line is in this buffer. */
616 size_t offset; /* How far into the buffer the line is. */
619 offset = linenum - b->start_line;
620 /* Find the control record. */
621 while (offset >= CTRL_SIZE)
626 return &l->starts[offset];
628 if (b->next == NULL && !load_buffer ())
630 b = b->next; /* Try the next data block. */
634 /* Return true if at least one more line is available for input. */
639 return find_line (current_line + 1) == NULL;
642 /* Open NAME as standard input. */
645 set_input_file (const char *name)
647 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
648 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
651 /* Write all lines from the beginning of the buffer up to, but
652 not including, line LAST_LINE, to the current output file.
653 If IGNORE is true, do not output lines selected here.
654 ARGNUM is the index in ARGV of the current pattern. */
657 write_to_file (uintmax_t last_line, bool ignore, int argnum)
659 struct cstring *line;
660 uintmax_t first_line; /* First available input line. */
661 uintmax_t lines; /* Number of lines to output. */
664 first_line = get_first_line_in_buffer ();
666 if (first_line > last_line)
668 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
672 lines = last_line - first_line;
674 for (i = 0; i < lines; i++)
676 line = remove_line ();
679 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
683 save_line_to_file (line);
687 /* Output any lines left after all regexps have been processed. */
690 dump_rest_of_file (void)
692 struct cstring *line;
694 while ((line = remove_line ()) != NULL)
695 save_line_to_file (line);
698 /* Handle an attempt to read beyond EOF under the control of record P,
699 on iteration REPETITION if nonzero. */
701 static void handle_line_error (const struct control *, uintmax_t)
704 handle_line_error (const struct control *p, uintmax_t repetition)
706 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
708 fprintf (stderr, _("%s: %s: line number out of range"),
709 program_name, quote (umaxtostr (p->lines_required, buf)));
711 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
713 fprintf (stderr, "\n");
718 /* Determine the line number that marks the end of this file,
719 then get those lines and save them to the output file.
720 P is the control record.
721 REPETITION is the repetition number. */
724 process_line_count (const struct control *p, uintmax_t repetition)
727 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
728 struct cstring *line;
730 create_output_file ();
732 linenum = get_first_line_in_buffer ();
734 while (linenum++ < last_line_to_save)
736 line = remove_line ();
738 handle_line_error (p, repetition);
739 save_line_to_file (line);
742 close_output_file ();
744 /* Ensure that the line number specified is not 1 greater than
745 the number of lines in the file. */
746 if (no_more_lines ())
747 handle_line_error (p, repetition);
750 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
752 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
754 fprintf (stderr, _("%s: %s: match not found"),
755 program_name, quote (global_argv[p->argnum]));
759 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
760 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
763 fprintf (stderr, "\n");
767 dump_rest_of_file ();
768 close_output_file ();
773 /* Read the input until a line matches the regexp in P, outputting
774 it unless P->IGNORE is true.
775 REPETITION is this repeat-count; 0 means the first time. */
778 process_regexp (struct control *p, uintmax_t repetition)
780 struct cstring *line; /* From input file. */
781 size_t line_len; /* To make "$" in regexps work. */
782 uintmax_t break_line; /* First line number of next file. */
783 bool ignore = p->ignore; /* If true, skip this section. */
787 create_output_file ();
789 /* If there is no offset for the regular expression, or
790 it is positive, then it is not necessary to buffer the lines. */
796 line = find_line (++current_line);
799 if (p->repeat_forever)
803 dump_rest_of_file ();
804 close_output_file ();
809 regexp_error (p, repetition, ignore);
811 line_len = line->len;
812 if (line->str[line_len - 1] == '\n')
814 ret = re_search (&p->re_compiled, line->str, line_len,
818 error (0, 0, _("error in regular expression search"));
823 line = remove_line ();
825 save_line_to_file (line);
833 /* Buffer the lines. */
836 line = find_line (++current_line);
839 if (p->repeat_forever)
843 dump_rest_of_file ();
844 close_output_file ();
849 regexp_error (p, repetition, ignore);
851 line_len = line->len;
852 if (line->str[line_len - 1] == '\n')
854 ret = re_search (&p->re_compiled, line->str, line_len,
858 error (0, 0, _("error in regular expression search"));
866 /* Account for any offset from this regexp. */
867 break_line = current_line + p->offset;
869 write_to_file (break_line, ignore, p->argnum);
872 close_output_file ();
875 current_line = break_line;
878 /* Split the input file according to the control records we have built. */
885 for (i = 0; i < control_used; i++)
888 if (controls[i].regexpr)
890 for (j = 0; (controls[i].repeat_forever
891 || j <= controls[i].repeat); j++)
892 process_regexp (&controls[i], j);
896 for (j = 0; (controls[i].repeat_forever
897 || j <= controls[i].repeat); j++)
898 process_line_count (&controls[i], j);
902 create_output_file ();
903 dump_rest_of_file ();
904 close_output_file ();
907 /* Return the name of output file number NUM.
909 This function is called from a signal handler, so it should invoke
910 only reentrant functions that are async-signal-safe. POSIX does
911 not guarantee this for the functions called below, but we don't
912 know of any hosts where this implementation isn't safe. */
915 make_filename (unsigned int num)
917 strcpy (filename_space, prefix);
919 sprintf (filename_space + strlen (prefix), suffix, num);
921 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
922 return filename_space;
925 /* Create the next output file. */
928 create_output_file (void)
934 output_filename = make_filename (files_created);
936 /* Create the output file in a critical section, to avoid races. */
937 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
938 output_stream = fopen (output_filename, "w");
939 fopen_ok = (output_stream != NULL);
941 files_created += fopen_ok;
942 sigprocmask (SIG_SETMASK, &oldset, NULL);
946 error (0, fopen_errno, "%s", output_filename);
952 /* If requested, delete all the files we have created. This function
953 must be called only from critical sections. */
956 delete_all_files (bool in_signal_handler)
963 for (i = 0; i < files_created; i++)
965 const char *name = make_filename (i);
966 if (unlink (name) != 0 && !in_signal_handler)
967 error (0, errno, "%s", name);
973 /* Close the current output file and print the count
974 of characters in this file. */
977 close_output_file (void)
981 if (ferror (output_stream))
983 error (0, 0, _("write error for %s"), quote (output_filename));
984 output_stream = NULL;
987 if (fclose (output_stream) != 0)
989 error (0, errno, "%s", output_filename);
990 output_stream = NULL;
993 if (bytes_written == 0 && elide_empty_files)
999 /* Remove the output file in a critical section, to avoid races. */
1000 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1001 unlink_ok = (unlink (output_filename) == 0);
1002 unlink_errno = errno;
1003 files_created -= unlink_ok;
1004 sigprocmask (SIG_SETMASK, &oldset, NULL);
1007 error (0, unlink_errno, "%s", output_filename);
1011 if (!suppress_count)
1013 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1014 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1017 output_stream = NULL;
1021 /* Save line LINE to the output file and
1022 increment the character count for the current file. */
1025 save_line_to_file (const struct cstring *line)
1027 fwrite (line->str, sizeof (char), line->len, output_stream);
1028 bytes_written += line->len;
1031 /* Return a new, initialized control record. */
1033 static struct control *
1034 new_control_record (void)
1036 static size_t control_allocated = 0; /* Total space allocated. */
1039 if (control_used == control_allocated)
1040 controls = X2NREALLOC (controls, &control_allocated);
1041 p = &controls[control_used++];
1044 p->repeat_forever = false;
1045 p->lines_required = 0;
1050 /* Check if there is a numeric offset after a regular expression.
1051 STR is the entire command line argument.
1052 P is the control record for this regular expression.
1053 NUM is the numeric part of STR. */
1056 check_for_offset (struct control *p, const char *str, const char *num)
1058 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1059 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1062 /* Given that the first character of command line arg STR is '{',
1063 make sure that the rest of the string is a valid repeat count
1064 and store its value in P.
1065 ARGNUM is the ARGV index of STR. */
1068 parse_repeat_count (int argnum, struct control *p, char *str)
1073 end = str + strlen (str) - 1;
1075 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1078 if (str+1 == end-1 && *(str+1) == '*')
1079 p->repeat_forever = true;
1082 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1084 error (EXIT_FAILURE, 0,
1085 _("%s}: integer required between `{' and `}'"),
1086 global_argv[argnum]);
1094 /* Extract the regular expression from STR and check for a numeric offset.
1095 STR should start with the regexp delimiter character.
1096 Return a new control record for the regular expression.
1097 ARGNUM is the ARGV index of STR.
1098 Unless IGNORE is true, mark these lines for output. */
1100 static struct control *
1101 extract_regexp (int argnum, bool ignore, char const *str)
1103 size_t len; /* Number of bytes in this regexp. */
1105 char const *closing_delim;
1109 closing_delim = strrchr (str + 1, delim);
1110 if (closing_delim == NULL)
1111 error (EXIT_FAILURE, 0,
1112 _("%s: closing delimiter `%c' missing"), str, delim);
1114 len = closing_delim - str - 1;
1115 p = new_control_record ();
1120 p->re_compiled.buffer = NULL;
1121 p->re_compiled.allocated = 0;
1122 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1123 p->re_compiled.translate = NULL;
1125 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1126 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1129 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1133 if (closing_delim[1])
1134 check_for_offset (p, str, closing_delim + 1);
1139 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1140 After each pattern, check if the next argument is a repeat count. */
1143 parse_patterns (int argc, int start, char **argv)
1145 int i; /* Index into ARGV. */
1146 struct control *p; /* New control record created. */
1148 static uintmax_t last_val = 0;
1150 for (i = start; i < argc; i++)
1152 if (*argv[i] == '/' || *argv[i] == '%')
1154 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1158 p = new_control_record ();
1161 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1162 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1164 error (EXIT_FAILURE, 0,
1165 _("%s: line number must be greater than zero"),
1169 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1170 error (EXIT_FAILURE, 0,
1171 _("line number %s is smaller than preceding line number, %s"),
1172 quote (argv[i]), umaxtostr (last_val, buf));
1175 if (val == last_val)
1177 _("warning: line number %s is the same as preceding line number"),
1182 p->lines_required = val;
1185 if (i + 1 < argc && *argv[i + 1] == '{')
1187 /* We have a repeat count. */
1189 parse_repeat_count (i, p, argv[i]);
1195 get_format_flags (char **format_ptr)
1197 unsigned int count = 0;
1199 for (; **format_ptr; (*format_ptr)++)
1201 switch (**format_ptr)
1212 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1223 get_format_width (char **format_ptr)
1225 unsigned long int val = 0;
1227 if (ISDIGIT (**format_ptr)
1228 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1230 error (EXIT_FAILURE, 0, _("invalid format width"));
1232 /* Allow for enough octal digits to represent the value of UINT_MAX,
1233 even if the field width is less than that. */
1234 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1238 get_format_prec (char **format_ptr)
1240 if (**format_ptr != '.')
1244 if (! ISDIGIT (**format_ptr))
1248 unsigned long int val;
1249 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1251 error (EXIT_FAILURE, 0, _("invalid format precision"));
1257 get_format_conv_type (char **format_ptr)
1259 unsigned char ch = *(*format_ptr)++;
1272 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1277 error (EXIT_FAILURE, 0,
1278 _("invalid conversion specifier in suffix: %c"), ch);
1280 error (EXIT_FAILURE, 0,
1281 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1286 max_out (char *format)
1288 size_t out_count = 0;
1289 bool percent = false;
1293 if (*format++ != '%')
1295 else if (*format == '%')
1303 error (EXIT_FAILURE, 0,
1304 _("too many %% conversion specifications in suffix"));
1306 out_count += get_format_flags (&format);
1308 size_t width = get_format_width (&format);
1309 size_t prec = get_format_prec (&format);
1311 out_count += MAX (width, prec);
1313 get_format_conv_type (&format);
1318 error (EXIT_FAILURE, 0,
1319 _("missing %% conversion specification in suffix"));
1325 main (int argc, char **argv)
1328 unsigned long int val;
1330 initialize_main (&argc, &argv);
1331 program_name = argv[0];
1332 setlocale (LC_ALL, "");
1333 bindtextdomain (PACKAGE, LOCALEDIR);
1334 textdomain (PACKAGE);
1336 atexit (close_stdout);
1341 suppress_count = false;
1342 remove_files = true;
1343 prefix = DEFAULT_PREFIX;
1345 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1357 remove_files = false;
1361 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1363 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1369 suppress_count = true;
1373 elide_empty_files = true;
1376 case_GETOPT_HELP_CHAR;
1378 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1381 usage (EXIT_FAILURE);
1384 if (argc - optind < 2)
1387 error (0, 0, _("missing operand"));
1389 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1390 usage (EXIT_FAILURE);
1394 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1396 filename_space = xmalloc (strlen (prefix) + digits + 2);
1398 set_input_file (argv[optind++]);
1400 parse_patterns (argc, optind, argv);
1404 static int const sig[] =
1406 /* The usual suspects. */
1407 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1424 enum { nsigs = sizeof sig / sizeof sig[0] };
1427 struct sigaction act;
1429 sigemptyset (&caught_signals);
1430 for (i = 0; i < nsigs; i++)
1432 sigaction (sig[i], NULL, &act);
1433 if (act.sa_handler != SIG_IGN)
1434 sigaddset (&caught_signals, sig[i]);
1437 act.sa_handler = interrupt_handler;
1438 act.sa_mask = caught_signals;
1441 for (i = 0; i < nsigs; i++)
1442 if (sigismember (&caught_signals, sig[i]))
1443 sigaction (sig[i], &act, NULL);
1445 for (i = 0; i < nsigs; i++)
1446 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1448 signal (sig[i], interrupt_handler);
1449 siginterrupt (sig[i], 1);
1456 if (close (STDIN_FILENO) != 0)
1458 error (0, errno, _("read error"));
1462 exit (EXIT_SUCCESS);
1468 if (status != EXIT_SUCCESS)
1469 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1474 Usage: %s [OPTION]... FILE PATTERN...\n\
1478 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1479 and output byte counts of each piece to standard output.\n\
1483 Mandatory arguments to long options are mandatory for short options too.\n\
1486 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1487 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1488 -k, --keep-files do not remove output files on errors\n\
1491 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1492 -s, --quiet, --silent do not print counts of output file sizes\n\
1493 -z, --elide-empty-files remove empty output files\n\
1495 fputs (HELP_OPTION_DESCRIPTION, stdout);
1496 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1499 Read standard input if FILE is -. Each PATTERN may be:\n\
1503 INTEGER copy up to but not including specified line number\n\
1504 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1505 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1506 {INTEGER} repeat the previous pattern specified number of times\n\
1507 {*} repeat the previous pattern as many times as possible\n\
1509 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1511 emit_bug_reporting_address ();