1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
23 #include <sys/types.h>
31 #include "fd-reopen.h"
33 #include "safe-read.h"
37 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
40 # define SA_NOCLDSTOP 0
41 # define sigprocmask(How, Set, Oset) /* empty */
43 # if ! HAVE_SIGINTERRUPT
44 # define siginterrupt(sig, flag) /* empty */
48 /* The official name of this program (e.g., no `g' prefix). */
49 #define PROGRAM_NAME "csplit"
52 proper_name ("Stuart Kemp"), \
53 proper_name ("David MacKenzie")
55 /* Increment size of area for control records. */
58 /* The default prefix for output file names. */
59 #define DEFAULT_PREFIX "xx"
61 /* A compiled pattern arg. */
64 intmax_t offset; /* Offset from regexp to split at. */
65 uintmax_t lines_required; /* Number of lines required. */
66 uintmax_t repeat; /* Repeat count. */
67 int argnum; /* ARGV index. */
68 bool repeat_forever; /* True if `*' used as a repeat count. */
69 bool ignore; /* If true, produce no output (for regexp). */
70 bool regexpr; /* True if regular expression was used. */
71 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
74 /* Initial size of data area in buffers. */
75 #define START_SIZE 8191
77 /* Increment size for data area. */
78 #define INCR_SIZE 2048
80 /* Number of lines kept in each node in line list. */
84 /* Some small values to test the algorithms. */
85 # define START_SIZE 200
90 /* A string with a length count. */
97 /* Pointers to the beginnings of lines in the buffer area.
98 These structures are linked together if needed. */
101 size_t used; /* Number of offsets used in this struct. */
102 size_t insert_index; /* Next offset to use when inserting line. */
103 size_t retrieve_index; /* Next index to use when retrieving line. */
104 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
105 struct line *next; /* Next in linked list. */
108 /* The structure to hold the input lines.
109 Contains a pointer to the data area and a list containing
110 pointers to the individual lines. */
113 size_t bytes_alloc; /* Size of the buffer area. */
114 size_t bytes_used; /* Bytes used in the buffer area. */
115 uintmax_t start_line; /* First line number in this buffer. */
116 uintmax_t first_available; /* First line that can be retrieved. */
117 size_t num_lines; /* Number of complete lines in this buffer. */
118 char *buffer; /* Data area. */
119 struct line *line_start; /* Head of list of pointers to lines. */
120 struct line *curr_line; /* The line start record currently in use. */
121 struct buffer_record *next;
124 static void close_output_file (void);
125 static void create_output_file (void);
126 static void delete_all_files (bool);
127 static void save_line_to_file (const struct cstring *line);
128 void usage (int status);
130 /* Start of buffer list. */
131 static struct buffer_record *head = NULL;
133 /* Partially read line. */
134 static char *hold_area = NULL;
136 /* Number of bytes in `hold_area'. */
137 static size_t hold_count = 0;
139 /* Number of the last line in the buffers. */
140 static uintmax_t last_line_number = 0;
142 /* Number of the line currently being examined. */
143 static uintmax_t current_line = 0;
145 /* If true, we have read EOF. */
146 static bool have_read_eof = false;
148 /* Name of output files. */
149 static char *volatile filename_space = NULL;
151 /* Prefix part of output file names. */
152 static char const *volatile prefix = NULL;
154 /* Suffix part of output file names. */
155 static char *volatile suffix = NULL;
157 /* Number of digits to use in output file names. */
158 static int volatile digits = 2;
160 /* Number of files created so far. */
161 static unsigned int volatile files_created = 0;
163 /* Number of bytes written to current file. */
164 static uintmax_t bytes_written;
166 /* Output file pointer. */
167 static FILE *output_stream = NULL;
169 /* Output file name. */
170 static char *output_filename = NULL;
172 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
173 static char **global_argv;
175 /* If true, do not print the count of bytes in each output file. */
176 static bool suppress_count;
178 /* If true, remove output files on error. */
179 static bool volatile remove_files;
181 /* If true, remove all output files which have a zero length. */
182 static bool elide_empty_files;
184 /* The compiled pattern arguments, which determine how to split
186 static struct control *controls;
188 /* Number of elements in `controls'. */
189 static size_t control_used;
191 /* The set of signals that are caught. */
192 static sigset_t caught_signals;
194 static struct option const longopts[] =
196 {"digits", required_argument, NULL, 'n'},
197 {"quiet", no_argument, NULL, 'q'},
198 {"silent", no_argument, NULL, 's'},
199 {"keep-files", no_argument, NULL, 'k'},
200 {"elide-empty-files", no_argument, NULL, 'z'},
201 {"prefix", required_argument, NULL, 'f'},
202 {"suffix-format", required_argument, NULL, 'b'},
203 {GETOPT_HELP_OPTION_DECL},
204 {GETOPT_VERSION_OPTION_DECL},
208 /* Optionally remove files created so far; then exit.
209 Called when an error detected. */
216 close_output_file ();
218 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
219 delete_all_files (false);
220 sigprocmask (SIG_SETMASK, &oldset, NULL);
223 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
234 error (0, 0, "%s", _("memory exhausted"));
239 interrupt_handler (int sig)
242 signal (sig, SIG_IGN);
244 delete_all_files (true);
246 signal (sig, SIG_DFL);
250 /* Keep track of NUM bytes of a partial line in buffer START.
251 These bytes will be retrieved later when another large buffer is read. */
254 save_to_hold_area (char *start, size_t num)
261 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
262 Return the number of bytes read. */
265 read_input (char *dest, size_t max_n_bytes)
269 if (max_n_bytes == 0)
272 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
275 have_read_eof = true;
277 if (bytes_read == SAFE_READ_ERROR)
279 error (0, errno, _("read error"));
286 /* Initialize existing line record P. */
289 clear_line_control (struct line *p)
293 p->retrieve_index = 0;
296 /* Return a new, initialized line record. */
299 new_line_control (void)
301 struct line *p = xmalloc (sizeof *p);
304 clear_line_control (p);
309 /* Record LINE_START, which is the address of the start of a line
310 of length LINE_LEN in the large buffer, in the lines buffer of B. */
313 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
317 /* If there is no existing area to keep line info, get some. */
318 if (b->line_start == NULL)
319 b->line_start = b->curr_line = new_line_control ();
321 /* If existing area for lines is full, get more. */
322 if (b->curr_line->used == CTRL_SIZE)
324 b->curr_line->next = new_line_control ();
325 b->curr_line = b->curr_line->next;
330 /* Record the start of the line, and update counters. */
331 l->starts[l->insert_index].str = line_start;
332 l->starts[l->insert_index].len = line_len;
337 /* Scan the buffer in B for newline characters
338 and record the line start locations and lengths in B.
339 Return the number of lines found in this buffer.
341 There may be an incomplete line at the end of the buffer;
342 a pointer is kept to this area, which will be used when
343 the next buffer is filled. */
346 record_line_starts (struct buffer_record *b)
348 char *line_start; /* Start of current line. */
349 char *line_end; /* End of each line found. */
350 size_t bytes_left; /* Length of incomplete last line. */
351 size_t lines; /* Number of lines found. */
352 size_t line_length; /* Length of each line found. */
354 if (b->bytes_used == 0)
358 line_start = b->buffer;
359 bytes_left = b->bytes_used;
363 line_end = memchr (line_start, '\n', bytes_left);
364 if (line_end == NULL)
366 line_length = line_end - line_start + 1;
367 keep_new_line (b, line_start, line_length);
368 bytes_left -= line_length;
369 line_start = line_end + 1;
373 /* Check for an incomplete last line. */
378 keep_new_line (b, line_start, bytes_left);
382 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
385 b->num_lines = lines;
386 b->first_available = b->start_line = last_line_number + 1;
387 last_line_number += lines;
392 /* Return a new buffer with room to store SIZE bytes, plus
393 an extra byte for safety. */
395 static struct buffer_record *
396 create_new_buffer (size_t size)
398 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
400 new_buffer->buffer = xmalloc (size + 1);
402 new_buffer->bytes_alloc = size;
403 new_buffer->line_start = new_buffer->curr_line = NULL;
408 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
409 least that size is currently free, use it, otherwise create a new one. */
411 static struct buffer_record *
412 get_new_buffer (size_t min_size)
414 struct buffer_record *new_buffer; /* Buffer to return. */
415 size_t alloc_size; /* Actual size that will be requested. */
417 alloc_size = START_SIZE;
418 if (alloc_size < min_size)
420 size_t s = min_size - alloc_size + INCR_SIZE - 1;
421 alloc_size += s - s % INCR_SIZE;
424 new_buffer = create_new_buffer (alloc_size);
426 new_buffer->num_lines = 0;
427 new_buffer->bytes_used = 0;
428 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
429 new_buffer->next = NULL;
435 free_buffer (struct buffer_record *buf)
441 /* Append buffer BUF to the linked list of buffers that contain
442 some data yet to be processed. */
445 save_buffer (struct buffer_record *buf)
447 struct buffer_record *p;
450 buf->curr_line = buf->line_start;
456 for (p = head; p->next; p = p->next)
462 /* Fill a buffer of input.
464 Set the initial size of the buffer to a default.
465 Fill the buffer (from the hold area and input stream)
466 and find the individual lines.
467 If no lines are found (the buffer is too small to hold the next line),
468 release the current buffer (whose contents would have been put in the
469 hold area) and repeat the process with another large buffer until at least
470 one entire line has been read.
472 Return true if a new buffer was obtained, otherwise false
473 (in which case end-of-file must have been encountered). */
478 struct buffer_record *b;
479 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
480 size_t bytes_avail; /* Size of new buffer created. */
481 size_t lines_found; /* Number of lines in this new buffer. */
482 char *p; /* Place to load into buffer. */
487 /* We must make the buffer at least as large as the amount of data
488 in the partial line left over from the last call. */
489 if (bytes_wanted < hold_count)
490 bytes_wanted = hold_count;
494 b = get_new_buffer (bytes_wanted);
495 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
498 /* First check the `holding' area for a partial line. */
501 memcpy (p, hold_area, hold_count);
503 b->bytes_used += hold_count;
504 bytes_avail -= hold_count;
508 b->bytes_used += read_input (p, bytes_avail);
510 lines_found = record_line_starts (b);
514 if (lines_found || have_read_eof)
517 if (xalloc_oversized (2, b->bytes_alloc))
519 bytes_wanted = 2 * b->bytes_alloc;
529 return lines_found != 0;
532 /* Return the line number of the first line that has not yet been retrieved. */
535 get_first_line_in_buffer (void)
537 if (head == NULL && !load_buffer ())
538 error (EXIT_FAILURE, errno, _("input disappeared"));
540 return head->first_available;
543 /* Return a pointer to the logical first line in the buffer and make the
544 next line the logical first line.
545 Return NULL if there is no more input. */
547 static struct cstring *
550 /* If non-NULL, this is the buffer for which the previous call
551 returned the final line. So now, presuming that line has been
552 processed, we can free the buffer and reset this pointer. */
553 static struct buffer_record *prev_buf = NULL;
555 struct cstring *line; /* Return value. */
556 struct line *l; /* For convenience. */
560 free_buffer (prev_buf);
564 if (head == NULL && !load_buffer ())
567 if (current_line < head->first_available)
568 current_line = head->first_available;
570 ++(head->first_available);
574 line = &l->starts[l->retrieve_index];
576 /* Advance index to next line. */
577 if (++l->retrieve_index == l->used)
579 /* Go on to the next line record. */
580 head->curr_line = l->next;
581 if (head->curr_line == NULL || head->curr_line->used == 0)
583 /* Go on to the next data block.
584 but first record the current one so we can free it
585 once the line we're returning has been processed. */
594 /* Search the buffers for line LINENUM, reading more input if necessary.
595 Return a pointer to the line, or NULL if it is not found in the file. */
597 static struct cstring *
598 find_line (uintmax_t linenum)
600 struct buffer_record *b;
602 if (head == NULL && !load_buffer ())
605 if (linenum < head->start_line)
610 if (linenum < b->start_line + b->num_lines)
612 /* The line is in this buffer. */
614 size_t offset; /* How far into the buffer the line is. */
617 offset = linenum - b->start_line;
618 /* Find the control record. */
619 while (offset >= CTRL_SIZE)
624 return &l->starts[offset];
626 if (b->next == NULL && !load_buffer ())
628 b = b->next; /* Try the next data block. */
632 /* Return true if at least one more line is available for input. */
637 return find_line (current_line + 1) == NULL;
640 /* Open NAME as standard input. */
643 set_input_file (const char *name)
645 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
646 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
649 /* Write all lines from the beginning of the buffer up to, but
650 not including, line LAST_LINE, to the current output file.
651 If IGNORE is true, do not output lines selected here.
652 ARGNUM is the index in ARGV of the current pattern. */
655 write_to_file (uintmax_t last_line, bool ignore, int argnum)
657 struct cstring *line;
658 uintmax_t first_line; /* First available input line. */
659 uintmax_t lines; /* Number of lines to output. */
662 first_line = get_first_line_in_buffer ();
664 if (first_line > last_line)
666 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
670 lines = last_line - first_line;
672 for (i = 0; i < lines; i++)
674 line = remove_line ();
677 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
681 save_line_to_file (line);
685 /* Output any lines left after all regexps have been processed. */
688 dump_rest_of_file (void)
690 struct cstring *line;
692 while ((line = remove_line ()) != NULL)
693 save_line_to_file (line);
696 /* Handle an attempt to read beyond EOF under the control of record P,
697 on iteration REPETITION if nonzero. */
699 static void handle_line_error (const struct control *, uintmax_t)
702 handle_line_error (const struct control *p, uintmax_t repetition)
704 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
706 fprintf (stderr, _("%s: %s: line number out of range"),
707 program_name, quote (umaxtostr (p->lines_required, buf)));
709 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
711 fprintf (stderr, "\n");
716 /* Determine the line number that marks the end of this file,
717 then get those lines and save them to the output file.
718 P is the control record.
719 REPETITION is the repetition number. */
722 process_line_count (const struct control *p, uintmax_t repetition)
725 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
726 struct cstring *line;
728 create_output_file ();
730 linenum = get_first_line_in_buffer ();
732 while (linenum++ < last_line_to_save)
734 line = remove_line ();
736 handle_line_error (p, repetition);
737 save_line_to_file (line);
740 close_output_file ();
742 /* Ensure that the line number specified is not 1 greater than
743 the number of lines in the file. */
744 if (no_more_lines ())
745 handle_line_error (p, repetition);
748 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
750 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
752 fprintf (stderr, _("%s: %s: match not found"),
753 program_name, quote (global_argv[p->argnum]));
757 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
758 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
761 fprintf (stderr, "\n");
765 dump_rest_of_file ();
766 close_output_file ();
771 /* Read the input until a line matches the regexp in P, outputting
772 it unless P->IGNORE is true.
773 REPETITION is this repeat-count; 0 means the first time. */
776 process_regexp (struct control *p, uintmax_t repetition)
778 struct cstring *line; /* From input file. */
779 size_t line_len; /* To make "$" in regexps work. */
780 uintmax_t break_line; /* First line number of next file. */
781 bool ignore = p->ignore; /* If true, skip this section. */
785 create_output_file ();
787 /* If there is no offset for the regular expression, or
788 it is positive, then it is not necessary to buffer the lines. */
794 line = find_line (++current_line);
797 if (p->repeat_forever)
801 dump_rest_of_file ();
802 close_output_file ();
807 regexp_error (p, repetition, ignore);
809 line_len = line->len;
810 if (line->str[line_len - 1] == '\n')
812 ret = re_search (&p->re_compiled, line->str, line_len,
816 error (0, 0, _("error in regular expression search"));
821 line = remove_line ();
823 save_line_to_file (line);
831 /* Buffer the lines. */
834 line = find_line (++current_line);
837 if (p->repeat_forever)
841 dump_rest_of_file ();
842 close_output_file ();
847 regexp_error (p, repetition, ignore);
849 line_len = line->len;
850 if (line->str[line_len - 1] == '\n')
852 ret = re_search (&p->re_compiled, line->str, line_len,
856 error (0, 0, _("error in regular expression search"));
864 /* Account for any offset from this regexp. */
865 break_line = current_line + p->offset;
867 write_to_file (break_line, ignore, p->argnum);
870 close_output_file ();
873 current_line = break_line;
876 /* Split the input file according to the control records we have built. */
883 for (i = 0; i < control_used; i++)
886 if (controls[i].regexpr)
888 for (j = 0; (controls[i].repeat_forever
889 || j <= controls[i].repeat); j++)
890 process_regexp (&controls[i], j);
894 for (j = 0; (controls[i].repeat_forever
895 || j <= controls[i].repeat); j++)
896 process_line_count (&controls[i], j);
900 create_output_file ();
901 dump_rest_of_file ();
902 close_output_file ();
905 /* Return the name of output file number NUM.
907 This function is called from a signal handler, so it should invoke
908 only reentrant functions that are async-signal-safe. POSIX does
909 not guarantee this for the functions called below, but we don't
910 know of any hosts where this implementation isn't safe. */
913 make_filename (unsigned int num)
915 strcpy (filename_space, prefix);
917 sprintf (filename_space + strlen (prefix), suffix, num);
919 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
920 return filename_space;
923 /* Create the next output file. */
926 create_output_file (void)
932 output_filename = make_filename (files_created);
934 /* Create the output file in a critical section, to avoid races. */
935 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
936 output_stream = fopen (output_filename, "w");
937 fopen_ok = (output_stream != NULL);
939 files_created += fopen_ok;
940 sigprocmask (SIG_SETMASK, &oldset, NULL);
944 error (0, fopen_errno, "%s", output_filename);
950 /* If requested, delete all the files we have created. This function
951 must be called only from critical sections. */
954 delete_all_files (bool in_signal_handler)
961 for (i = 0; i < files_created; i++)
963 const char *name = make_filename (i);
964 if (unlink (name) != 0 && !in_signal_handler)
965 error (0, errno, "%s", name);
971 /* Close the current output file and print the count
972 of characters in this file. */
975 close_output_file (void)
979 if (ferror (output_stream))
981 error (0, 0, _("write error for %s"), quote (output_filename));
982 output_stream = NULL;
985 if (fclose (output_stream) != 0)
987 error (0, errno, "%s", output_filename);
988 output_stream = NULL;
991 if (bytes_written == 0 && elide_empty_files)
997 /* Remove the output file in a critical section, to avoid races. */
998 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
999 unlink_ok = (unlink (output_filename) == 0);
1000 unlink_errno = errno;
1001 files_created -= unlink_ok;
1002 sigprocmask (SIG_SETMASK, &oldset, NULL);
1005 error (0, unlink_errno, "%s", output_filename);
1009 if (!suppress_count)
1011 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1012 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1015 output_stream = NULL;
1019 /* Save line LINE to the output file and
1020 increment the character count for the current file. */
1023 save_line_to_file (const struct cstring *line)
1025 fwrite (line->str, sizeof (char), line->len, output_stream);
1026 bytes_written += line->len;
1029 /* Return a new, initialized control record. */
1031 static struct control *
1032 new_control_record (void)
1034 static size_t control_allocated = 0; /* Total space allocated. */
1037 if (control_used == control_allocated)
1038 controls = X2NREALLOC (controls, &control_allocated);
1039 p = &controls[control_used++];
1042 p->repeat_forever = false;
1043 p->lines_required = 0;
1048 /* Check if there is a numeric offset after a regular expression.
1049 STR is the entire command line argument.
1050 P is the control record for this regular expression.
1051 NUM is the numeric part of STR. */
1054 check_for_offset (struct control *p, const char *str, const char *num)
1056 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1057 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1060 /* Given that the first character of command line arg STR is '{',
1061 make sure that the rest of the string is a valid repeat count
1062 and store its value in P.
1063 ARGNUM is the ARGV index of STR. */
1066 parse_repeat_count (int argnum, struct control *p, char *str)
1071 end = str + strlen (str) - 1;
1073 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1076 if (str+1 == end-1 && *(str+1) == '*')
1077 p->repeat_forever = true;
1080 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1082 error (EXIT_FAILURE, 0,
1083 _("%s}: integer required between `{' and `}'"),
1084 global_argv[argnum]);
1092 /* Extract the regular expression from STR and check for a numeric offset.
1093 STR should start with the regexp delimiter character.
1094 Return a new control record for the regular expression.
1095 ARGNUM is the ARGV index of STR.
1096 Unless IGNORE is true, mark these lines for output. */
1098 static struct control *
1099 extract_regexp (int argnum, bool ignore, char const *str)
1101 size_t len; /* Number of bytes in this regexp. */
1103 char const *closing_delim;
1107 closing_delim = strrchr (str + 1, delim);
1108 if (closing_delim == NULL)
1109 error (EXIT_FAILURE, 0,
1110 _("%s: closing delimiter `%c' missing"), str, delim);
1112 len = closing_delim - str - 1;
1113 p = new_control_record ();
1118 p->re_compiled.buffer = NULL;
1119 p->re_compiled.allocated = 0;
1120 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1121 p->re_compiled.translate = NULL;
1123 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1124 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1127 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1131 if (closing_delim[1])
1132 check_for_offset (p, str, closing_delim + 1);
1137 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1138 After each pattern, check if the next argument is a repeat count. */
1141 parse_patterns (int argc, int start, char **argv)
1143 int i; /* Index into ARGV. */
1144 struct control *p; /* New control record created. */
1146 static uintmax_t last_val = 0;
1148 for (i = start; i < argc; i++)
1150 if (*argv[i] == '/' || *argv[i] == '%')
1152 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1156 p = new_control_record ();
1159 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1160 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1162 error (EXIT_FAILURE, 0,
1163 _("%s: line number must be greater than zero"),
1167 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1168 error (EXIT_FAILURE, 0,
1169 _("line number %s is smaller than preceding line number, %s"),
1170 quote (argv[i]), umaxtostr (last_val, buf));
1173 if (val == last_val)
1175 _("warning: line number %s is the same as preceding line number"),
1180 p->lines_required = val;
1183 if (i + 1 < argc && *argv[i + 1] == '{')
1185 /* We have a repeat count. */
1187 parse_repeat_count (i, p, argv[i]);
1193 get_format_flags (char **format_ptr)
1195 unsigned int count = 0;
1197 for (; **format_ptr; (*format_ptr)++)
1199 switch (**format_ptr)
1210 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1221 get_format_width (char **format_ptr)
1223 unsigned long int val = 0;
1225 if (ISDIGIT (**format_ptr)
1226 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1228 error (EXIT_FAILURE, 0, _("invalid format width"));
1230 /* Allow for enough octal digits to represent the value of UINT_MAX,
1231 even if the field width is less than that. */
1232 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1236 get_format_prec (char **format_ptr)
1238 if (**format_ptr != '.')
1242 if (! ISDIGIT (**format_ptr))
1246 unsigned long int val;
1247 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1249 error (EXIT_FAILURE, 0, _("invalid format precision"));
1255 get_format_conv_type (char **format_ptr)
1257 unsigned char ch = *(*format_ptr)++;
1270 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1275 error (EXIT_FAILURE, 0,
1276 _("invalid conversion specifier in suffix: %c"), ch);
1278 error (EXIT_FAILURE, 0,
1279 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1284 max_out (char *format)
1286 size_t out_count = 0;
1287 bool percent = false;
1291 if (*format++ != '%')
1293 else if (*format == '%')
1301 error (EXIT_FAILURE, 0,
1302 _("too many %% conversion specifications in suffix"));
1304 out_count += get_format_flags (&format);
1306 size_t width = get_format_width (&format);
1307 size_t prec = get_format_prec (&format);
1309 out_count += MAX (width, prec);
1311 get_format_conv_type (&format);
1316 error (EXIT_FAILURE, 0,
1317 _("missing %% conversion specification in suffix"));
1323 main (int argc, char **argv)
1326 unsigned long int val;
1328 initialize_main (&argc, &argv);
1329 set_program_name (argv[0]);
1330 setlocale (LC_ALL, "");
1331 bindtextdomain (PACKAGE, LOCALEDIR);
1332 textdomain (PACKAGE);
1334 atexit (close_stdout);
1339 suppress_count = false;
1340 remove_files = true;
1341 prefix = DEFAULT_PREFIX;
1343 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1355 remove_files = false;
1359 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1361 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1367 suppress_count = true;
1371 elide_empty_files = true;
1374 case_GETOPT_HELP_CHAR;
1376 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1379 usage (EXIT_FAILURE);
1382 if (argc - optind < 2)
1385 error (0, 0, _("missing operand"));
1387 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1388 usage (EXIT_FAILURE);
1392 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1394 filename_space = xmalloc (strlen (prefix) + digits + 2);
1396 set_input_file (argv[optind++]);
1398 parse_patterns (argc, optind, argv);
1402 static int const sig[] =
1404 /* The usual suspects. */
1405 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1422 enum { nsigs = sizeof sig / sizeof sig[0] };
1425 struct sigaction act;
1427 sigemptyset (&caught_signals);
1428 for (i = 0; i < nsigs; i++)
1430 sigaction (sig[i], NULL, &act);
1431 if (act.sa_handler != SIG_IGN)
1432 sigaddset (&caught_signals, sig[i]);
1435 act.sa_handler = interrupt_handler;
1436 act.sa_mask = caught_signals;
1439 for (i = 0; i < nsigs; i++)
1440 if (sigismember (&caught_signals, sig[i]))
1441 sigaction (sig[i], &act, NULL);
1443 for (i = 0; i < nsigs; i++)
1444 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1446 signal (sig[i], interrupt_handler);
1447 siginterrupt (sig[i], 1);
1454 if (close (STDIN_FILENO) != 0)
1456 error (0, errno, _("read error"));
1460 exit (EXIT_SUCCESS);
1466 if (status != EXIT_SUCCESS)
1467 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1472 Usage: %s [OPTION]... FILE PATTERN...\n\
1476 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1477 and output byte counts of each piece to standard output.\n\
1481 Mandatory arguments to long options are mandatory for short options too.\n\
1484 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1485 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1486 -k, --keep-files do not remove output files on errors\n\
1489 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1490 -s, --quiet, --silent do not print counts of output file sizes\n\
1491 -z, --elide-empty-files remove empty output files\n\
1493 fputs (HELP_OPTION_DESCRIPTION, stdout);
1494 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1497 Read standard input if FILE is -. Each PATTERN may be:\n\
1501 INTEGER copy up to but not including specified line number\n\
1502 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1503 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1504 {INTEGER} repeat the previous pattern specified number of times\n\
1505 {*} repeat the previous pattern as many times as possible\n\
1507 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1509 emit_bug_reporting_address ();