1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
23 #include <sys/types.h>
31 #include "fd-reopen.h"
34 #include "safe-read.h"
38 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
41 # define SA_NOCLDSTOP 0
42 # define sigprocmask(How, Set, Oset) /* empty */
44 # if ! HAVE_SIGINTERRUPT
45 # define siginterrupt(sig, flag) /* empty */
49 /* The official name of this program (e.g., no `g' prefix). */
50 #define PROGRAM_NAME "csplit"
53 proper_name ("Stuart Kemp"), \
54 proper_name ("David MacKenzie")
56 /* Increment size of area for control records. */
59 /* The default prefix for output file names. */
60 #define DEFAULT_PREFIX "xx"
62 /* A compiled pattern arg. */
65 intmax_t offset; /* Offset from regexp to split at. */
66 uintmax_t lines_required; /* Number of lines required. */
67 uintmax_t repeat; /* Repeat count. */
68 int argnum; /* ARGV index. */
69 bool repeat_forever; /* True if `*' used as a repeat count. */
70 bool ignore; /* If true, produce no output (for regexp). */
71 bool regexpr; /* True if regular expression was used. */
72 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
75 /* Initial size of data area in buffers. */
76 #define START_SIZE 8191
78 /* Increment size for data area. */
79 #define INCR_SIZE 2048
81 /* Number of lines kept in each node in line list. */
85 /* Some small values to test the algorithms. */
86 # define START_SIZE 200
91 /* A string with a length count. */
98 /* Pointers to the beginnings of lines in the buffer area.
99 These structures are linked together if needed. */
102 size_t used; /* Number of offsets used in this struct. */
103 size_t insert_index; /* Next offset to use when inserting line. */
104 size_t retrieve_index; /* Next index to use when retrieving line. */
105 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
106 struct line *next; /* Next in linked list. */
109 /* The structure to hold the input lines.
110 Contains a pointer to the data area and a list containing
111 pointers to the individual lines. */
114 size_t bytes_alloc; /* Size of the buffer area. */
115 size_t bytes_used; /* Bytes used in the buffer area. */
116 uintmax_t start_line; /* First line number in this buffer. */
117 uintmax_t first_available; /* First line that can be retrieved. */
118 size_t num_lines; /* Number of complete lines in this buffer. */
119 char *buffer; /* Data area. */
120 struct line *line_start; /* Head of list of pointers to lines. */
121 struct line *curr_line; /* The line start record currently in use. */
122 struct buffer_record *next;
125 static void close_output_file (void);
126 static void create_output_file (void);
127 static void delete_all_files (bool);
128 static void save_line_to_file (const struct cstring *line);
129 void usage (int status);
131 /* Start of buffer list. */
132 static struct buffer_record *head = NULL;
134 /* Partially read line. */
135 static char *hold_area = NULL;
137 /* Number of bytes in `hold_area'. */
138 static size_t hold_count = 0;
140 /* Number of the last line in the buffers. */
141 static uintmax_t last_line_number = 0;
143 /* Number of the line currently being examined. */
144 static uintmax_t current_line = 0;
146 /* If true, we have read EOF. */
147 static bool have_read_eof = false;
149 /* Name of output files. */
150 static char *volatile filename_space = NULL;
152 /* Prefix part of output file names. */
153 static char const *volatile prefix = NULL;
155 /* Suffix part of output file names. */
156 static char *volatile suffix = NULL;
158 /* Number of digits to use in output file names. */
159 static int volatile digits = 2;
161 /* Number of files created so far. */
162 static unsigned int volatile files_created = 0;
164 /* Number of bytes written to current file. */
165 static uintmax_t bytes_written;
167 /* Output file pointer. */
168 static FILE *output_stream = NULL;
170 /* Output file name. */
171 static char *output_filename = NULL;
173 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
174 static char **global_argv;
176 /* If true, do not print the count of bytes in each output file. */
177 static bool suppress_count;
179 /* If true, remove output files on error. */
180 static bool volatile remove_files;
182 /* If true, remove all output files which have a zero length. */
183 static bool elide_empty_files;
185 /* The compiled pattern arguments, which determine how to split
187 static struct control *controls;
189 /* Number of elements in `controls'. */
190 static size_t control_used;
192 /* The set of signals that are caught. */
193 static sigset_t caught_signals;
195 static struct option const longopts[] =
197 {"digits", required_argument, NULL, 'n'},
198 {"quiet", no_argument, NULL, 'q'},
199 {"silent", no_argument, NULL, 's'},
200 {"keep-files", no_argument, NULL, 'k'},
201 {"elide-empty-files", no_argument, NULL, 'z'},
202 {"prefix", required_argument, NULL, 'f'},
203 {"suffix-format", required_argument, NULL, 'b'},
204 {GETOPT_HELP_OPTION_DECL},
205 {GETOPT_VERSION_OPTION_DECL},
209 /* Optionally remove files created so far; then exit.
210 Called when an error detected. */
217 close_output_file ();
219 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
220 delete_all_files (false);
221 sigprocmask (SIG_SETMASK, &oldset, NULL);
224 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
235 error (0, 0, "%s", _("memory exhausted"));
240 interrupt_handler (int sig)
243 signal (sig, SIG_IGN);
245 delete_all_files (true);
247 signal (sig, SIG_DFL);
251 /* Keep track of NUM bytes of a partial line in buffer START.
252 These bytes will be retrieved later when another large buffer is read. */
255 save_to_hold_area (char *start, size_t num)
262 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
263 Return the number of bytes read. */
266 read_input (char *dest, size_t max_n_bytes)
270 if (max_n_bytes == 0)
273 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
276 have_read_eof = true;
278 if (bytes_read == SAFE_READ_ERROR)
280 error (0, errno, _("read error"));
287 /* Initialize existing line record P. */
290 clear_line_control (struct line *p)
294 p->retrieve_index = 0;
297 /* Return a new, initialized line record. */
300 new_line_control (void)
302 struct line *p = xmalloc (sizeof *p);
305 clear_line_control (p);
310 /* Record LINE_START, which is the address of the start of a line
311 of length LINE_LEN in the large buffer, in the lines buffer of B. */
314 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
318 /* If there is no existing area to keep line info, get some. */
319 if (b->line_start == NULL)
320 b->line_start = b->curr_line = new_line_control ();
322 /* If existing area for lines is full, get more. */
323 if (b->curr_line->used == CTRL_SIZE)
325 b->curr_line->next = new_line_control ();
326 b->curr_line = b->curr_line->next;
331 /* Record the start of the line, and update counters. */
332 l->starts[l->insert_index].str = line_start;
333 l->starts[l->insert_index].len = line_len;
338 /* Scan the buffer in B for newline characters
339 and record the line start locations and lengths in B.
340 Return the number of lines found in this buffer.
342 There may be an incomplete line at the end of the buffer;
343 a pointer is kept to this area, which will be used when
344 the next buffer is filled. */
347 record_line_starts (struct buffer_record *b)
349 char *line_start; /* Start of current line. */
350 char *line_end; /* End of each line found. */
351 size_t bytes_left; /* Length of incomplete last line. */
352 size_t lines; /* Number of lines found. */
353 size_t line_length; /* Length of each line found. */
355 if (b->bytes_used == 0)
359 line_start = b->buffer;
360 bytes_left = b->bytes_used;
364 line_end = memchr (line_start, '\n', bytes_left);
365 if (line_end == NULL)
367 line_length = line_end - line_start + 1;
368 keep_new_line (b, line_start, line_length);
369 bytes_left -= line_length;
370 line_start = line_end + 1;
374 /* Check for an incomplete last line. */
379 keep_new_line (b, line_start, bytes_left);
383 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
386 b->num_lines = lines;
387 b->first_available = b->start_line = last_line_number + 1;
388 last_line_number += lines;
393 /* Return a new buffer with room to store SIZE bytes, plus
394 an extra byte for safety. */
396 static struct buffer_record *
397 create_new_buffer (size_t size)
399 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
401 new_buffer->buffer = xmalloc (size + 1);
403 new_buffer->bytes_alloc = size;
404 new_buffer->line_start = new_buffer->curr_line = NULL;
409 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
410 least that size is currently free, use it, otherwise create a new one. */
412 static struct buffer_record *
413 get_new_buffer (size_t min_size)
415 struct buffer_record *new_buffer; /* Buffer to return. */
416 size_t alloc_size; /* Actual size that will be requested. */
418 alloc_size = START_SIZE;
419 if (alloc_size < min_size)
421 size_t s = min_size - alloc_size + INCR_SIZE - 1;
422 alloc_size += s - s % INCR_SIZE;
425 new_buffer = create_new_buffer (alloc_size);
427 new_buffer->num_lines = 0;
428 new_buffer->bytes_used = 0;
429 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
430 new_buffer->next = NULL;
436 free_buffer (struct buffer_record *buf)
442 /* Append buffer BUF to the linked list of buffers that contain
443 some data yet to be processed. */
446 save_buffer (struct buffer_record *buf)
448 struct buffer_record *p;
451 buf->curr_line = buf->line_start;
457 for (p = head; p->next; p = p->next)
463 /* Fill a buffer of input.
465 Set the initial size of the buffer to a default.
466 Fill the buffer (from the hold area and input stream)
467 and find the individual lines.
468 If no lines are found (the buffer is too small to hold the next line),
469 release the current buffer (whose contents would have been put in the
470 hold area) and repeat the process with another large buffer until at least
471 one entire line has been read.
473 Return true if a new buffer was obtained, otherwise false
474 (in which case end-of-file must have been encountered). */
479 struct buffer_record *b;
480 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
481 size_t bytes_avail; /* Size of new buffer created. */
482 size_t lines_found; /* Number of lines in this new buffer. */
483 char *p; /* Place to load into buffer. */
488 /* We must make the buffer at least as large as the amount of data
489 in the partial line left over from the last call. */
490 if (bytes_wanted < hold_count)
491 bytes_wanted = hold_count;
495 b = get_new_buffer (bytes_wanted);
496 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
499 /* First check the `holding' area for a partial line. */
502 memcpy (p, hold_area, hold_count);
504 b->bytes_used += hold_count;
505 bytes_avail -= hold_count;
509 b->bytes_used += read_input (p, bytes_avail);
511 lines_found = record_line_starts (b);
515 if (lines_found || have_read_eof)
518 if (xalloc_oversized (2, b->bytes_alloc))
520 bytes_wanted = 2 * b->bytes_alloc;
530 return lines_found != 0;
533 /* Return the line number of the first line that has not yet been retrieved. */
536 get_first_line_in_buffer (void)
538 if (head == NULL && !load_buffer ())
539 error (EXIT_FAILURE, errno, _("input disappeared"));
541 return head->first_available;
544 /* Return a pointer to the logical first line in the buffer and make the
545 next line the logical first line.
546 Return NULL if there is no more input. */
548 static struct cstring *
551 /* If non-NULL, this is the buffer for which the previous call
552 returned the final line. So now, presuming that line has been
553 processed, we can free the buffer and reset this pointer. */
554 static struct buffer_record *prev_buf = NULL;
556 struct cstring *line; /* Return value. */
557 struct line *l; /* For convenience. */
561 free_buffer (prev_buf);
565 if (head == NULL && !load_buffer ())
568 if (current_line < head->first_available)
569 current_line = head->first_available;
571 ++(head->first_available);
575 line = &l->starts[l->retrieve_index];
577 /* Advance index to next line. */
578 if (++l->retrieve_index == l->used)
580 /* Go on to the next line record. */
581 head->curr_line = l->next;
582 if (head->curr_line == NULL || head->curr_line->used == 0)
584 /* Go on to the next data block.
585 but first record the current one so we can free it
586 once the line we're returning has been processed. */
595 /* Search the buffers for line LINENUM, reading more input if necessary.
596 Return a pointer to the line, or NULL if it is not found in the file. */
598 static struct cstring *
599 find_line (uintmax_t linenum)
601 struct buffer_record *b;
603 if (head == NULL && !load_buffer ())
606 if (linenum < head->start_line)
611 if (linenum < b->start_line + b->num_lines)
613 /* The line is in this buffer. */
615 size_t offset; /* How far into the buffer the line is. */
618 offset = linenum - b->start_line;
619 /* Find the control record. */
620 while (offset >= CTRL_SIZE)
625 return &l->starts[offset];
627 if (b->next == NULL && !load_buffer ())
629 b = b->next; /* Try the next data block. */
633 /* Return true if at least one more line is available for input. */
638 return find_line (current_line + 1) == NULL;
641 /* Open NAME as standard input. */
644 set_input_file (const char *name)
646 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
647 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
650 /* Write all lines from the beginning of the buffer up to, but
651 not including, line LAST_LINE, to the current output file.
652 If IGNORE is true, do not output lines selected here.
653 ARGNUM is the index in ARGV of the current pattern. */
656 write_to_file (uintmax_t last_line, bool ignore, int argnum)
658 struct cstring *line;
659 uintmax_t first_line; /* First available input line. */
660 uintmax_t lines; /* Number of lines to output. */
663 first_line = get_first_line_in_buffer ();
665 if (first_line > last_line)
667 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
671 lines = last_line - first_line;
673 for (i = 0; i < lines; i++)
675 line = remove_line ();
678 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
682 save_line_to_file (line);
686 /* Output any lines left after all regexps have been processed. */
689 dump_rest_of_file (void)
691 struct cstring *line;
693 while ((line = remove_line ()) != NULL)
694 save_line_to_file (line);
697 /* Handle an attempt to read beyond EOF under the control of record P,
698 on iteration REPETITION if nonzero. */
700 static void handle_line_error (const struct control *, uintmax_t)
703 handle_line_error (const struct control *p, uintmax_t repetition)
705 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
707 fprintf (stderr, _("%s: %s: line number out of range"),
708 program_name, quote (umaxtostr (p->lines_required, buf)));
710 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
712 fprintf (stderr, "\n");
717 /* Determine the line number that marks the end of this file,
718 then get those lines and save them to the output file.
719 P is the control record.
720 REPETITION is the repetition number. */
723 process_line_count (const struct control *p, uintmax_t repetition)
726 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
727 struct cstring *line;
729 create_output_file ();
731 linenum = get_first_line_in_buffer ();
733 while (linenum++ < last_line_to_save)
735 line = remove_line ();
737 handle_line_error (p, repetition);
738 save_line_to_file (line);
741 close_output_file ();
743 /* Ensure that the line number specified is not 1 greater than
744 the number of lines in the file. */
745 if (no_more_lines ())
746 handle_line_error (p, repetition);
749 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
751 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
753 fprintf (stderr, _("%s: %s: match not found"),
754 program_name, quote (global_argv[p->argnum]));
758 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
759 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
762 fprintf (stderr, "\n");
766 dump_rest_of_file ();
767 close_output_file ();
772 /* Read the input until a line matches the regexp in P, outputting
773 it unless P->IGNORE is true.
774 REPETITION is this repeat-count; 0 means the first time. */
777 process_regexp (struct control *p, uintmax_t repetition)
779 struct cstring *line; /* From input file. */
780 size_t line_len; /* To make "$" in regexps work. */
781 uintmax_t break_line; /* First line number of next file. */
782 bool ignore = p->ignore; /* If true, skip this section. */
786 create_output_file ();
788 /* If there is no offset for the regular expression, or
789 it is positive, then it is not necessary to buffer the lines. */
795 line = find_line (++current_line);
798 if (p->repeat_forever)
802 dump_rest_of_file ();
803 close_output_file ();
808 regexp_error (p, repetition, ignore);
810 line_len = line->len;
811 if (line->str[line_len - 1] == '\n')
813 ret = re_search (&p->re_compiled, line->str, line_len,
817 error (0, 0, _("error in regular expression search"));
822 line = remove_line ();
824 save_line_to_file (line);
832 /* Buffer the lines. */
835 line = find_line (++current_line);
838 if (p->repeat_forever)
842 dump_rest_of_file ();
843 close_output_file ();
848 regexp_error (p, repetition, ignore);
850 line_len = line->len;
851 if (line->str[line_len - 1] == '\n')
853 ret = re_search (&p->re_compiled, line->str, line_len,
857 error (0, 0, _("error in regular expression search"));
865 /* Account for any offset from this regexp. */
866 break_line = current_line + p->offset;
868 write_to_file (break_line, ignore, p->argnum);
871 close_output_file ();
874 current_line = break_line;
877 /* Split the input file according to the control records we have built. */
884 for (i = 0; i < control_used; i++)
887 if (controls[i].regexpr)
889 for (j = 0; (controls[i].repeat_forever
890 || j <= controls[i].repeat); j++)
891 process_regexp (&controls[i], j);
895 for (j = 0; (controls[i].repeat_forever
896 || j <= controls[i].repeat); j++)
897 process_line_count (&controls[i], j);
901 create_output_file ();
902 dump_rest_of_file ();
903 close_output_file ();
906 /* Return the name of output file number NUM.
908 This function is called from a signal handler, so it should invoke
909 only reentrant functions that are async-signal-safe. POSIX does
910 not guarantee this for the functions called below, but we don't
911 know of any hosts where this implementation isn't safe. */
914 make_filename (unsigned int num)
916 strcpy (filename_space, prefix);
918 sprintf (filename_space + strlen (prefix), suffix, num);
920 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
921 return filename_space;
924 /* Create the next output file. */
927 create_output_file (void)
933 output_filename = make_filename (files_created);
935 /* Create the output file in a critical section, to avoid races. */
936 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
937 output_stream = fopen (output_filename, "w");
938 fopen_ok = (output_stream != NULL);
940 files_created += fopen_ok;
941 sigprocmask (SIG_SETMASK, &oldset, NULL);
945 error (0, fopen_errno, "%s", output_filename);
951 /* If requested, delete all the files we have created. This function
952 must be called only from critical sections. */
955 delete_all_files (bool in_signal_handler)
962 for (i = 0; i < files_created; i++)
964 const char *name = make_filename (i);
965 if (unlink (name) != 0 && !in_signal_handler)
966 error (0, errno, "%s", name);
972 /* Close the current output file and print the count
973 of characters in this file. */
976 close_output_file (void)
980 if (ferror (output_stream))
982 error (0, 0, _("write error for %s"), quote (output_filename));
983 output_stream = NULL;
986 if (fclose (output_stream) != 0)
988 error (0, errno, "%s", output_filename);
989 output_stream = NULL;
992 if (bytes_written == 0 && elide_empty_files)
998 /* Remove the output file in a critical section, to avoid races. */
999 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1000 unlink_ok = (unlink (output_filename) == 0);
1001 unlink_errno = errno;
1002 files_created -= unlink_ok;
1003 sigprocmask (SIG_SETMASK, &oldset, NULL);
1006 error (0, unlink_errno, "%s", output_filename);
1010 if (!suppress_count)
1012 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1013 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1016 output_stream = NULL;
1020 /* Save line LINE to the output file and
1021 increment the character count for the current file. */
1024 save_line_to_file (const struct cstring *line)
1026 fwrite (line->str, sizeof (char), line->len, output_stream);
1027 bytes_written += line->len;
1030 /* Return a new, initialized control record. */
1032 static struct control *
1033 new_control_record (void)
1035 static size_t control_allocated = 0; /* Total space allocated. */
1038 if (control_used == control_allocated)
1039 controls = X2NREALLOC (controls, &control_allocated);
1040 p = &controls[control_used++];
1043 p->repeat_forever = false;
1044 p->lines_required = 0;
1049 /* Check if there is a numeric offset after a regular expression.
1050 STR is the entire command line argument.
1051 P is the control record for this regular expression.
1052 NUM is the numeric part of STR. */
1055 check_for_offset (struct control *p, const char *str, const char *num)
1057 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1058 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1061 /* Given that the first character of command line arg STR is '{',
1062 make sure that the rest of the string is a valid repeat count
1063 and store its value in P.
1064 ARGNUM is the ARGV index of STR. */
1067 parse_repeat_count (int argnum, struct control *p, char *str)
1072 end = str + strlen (str) - 1;
1074 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1077 if (str+1 == end-1 && *(str+1) == '*')
1078 p->repeat_forever = true;
1081 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1083 error (EXIT_FAILURE, 0,
1084 _("%s}: integer required between `{' and `}'"),
1085 global_argv[argnum]);
1093 /* Extract the regular expression from STR and check for a numeric offset.
1094 STR should start with the regexp delimiter character.
1095 Return a new control record for the regular expression.
1096 ARGNUM is the ARGV index of STR.
1097 Unless IGNORE is true, mark these lines for output. */
1099 static struct control *
1100 extract_regexp (int argnum, bool ignore, char const *str)
1102 size_t len; /* Number of bytes in this regexp. */
1104 char const *closing_delim;
1108 closing_delim = strrchr (str + 1, delim);
1109 if (closing_delim == NULL)
1110 error (EXIT_FAILURE, 0,
1111 _("%s: closing delimiter `%c' missing"), str, delim);
1113 len = closing_delim - str - 1;
1114 p = new_control_record ();
1119 p->re_compiled.buffer = NULL;
1120 p->re_compiled.allocated = 0;
1121 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1122 p->re_compiled.translate = NULL;
1124 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1125 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1128 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1132 if (closing_delim[1])
1133 check_for_offset (p, str, closing_delim + 1);
1138 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1139 After each pattern, check if the next argument is a repeat count. */
1142 parse_patterns (int argc, int start, char **argv)
1144 int i; /* Index into ARGV. */
1145 struct control *p; /* New control record created. */
1147 static uintmax_t last_val = 0;
1149 for (i = start; i < argc; i++)
1151 if (*argv[i] == '/' || *argv[i] == '%')
1153 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1157 p = new_control_record ();
1160 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1161 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1163 error (EXIT_FAILURE, 0,
1164 _("%s: line number must be greater than zero"),
1168 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1169 error (EXIT_FAILURE, 0,
1170 _("line number %s is smaller than preceding line number, %s"),
1171 quote (argv[i]), umaxtostr (last_val, buf));
1174 if (val == last_val)
1176 _("warning: line number %s is the same as preceding line number"),
1181 p->lines_required = val;
1184 if (i + 1 < argc && *argv[i + 1] == '{')
1186 /* We have a repeat count. */
1188 parse_repeat_count (i, p, argv[i]);
1194 get_format_flags (char **format_ptr)
1196 unsigned int count = 0;
1198 for (; **format_ptr; (*format_ptr)++)
1200 switch (**format_ptr)
1211 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1222 get_format_width (char **format_ptr)
1224 unsigned long int val = 0;
1226 if (ISDIGIT (**format_ptr)
1227 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1229 error (EXIT_FAILURE, 0, _("invalid format width"));
1231 /* Allow for enough octal digits to represent the value of UINT_MAX,
1232 even if the field width is less than that. */
1233 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1237 get_format_prec (char **format_ptr)
1239 if (**format_ptr != '.')
1243 if (! ISDIGIT (**format_ptr))
1247 unsigned long int val;
1248 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1250 error (EXIT_FAILURE, 0, _("invalid format precision"));
1256 get_format_conv_type (char **format_ptr)
1258 unsigned char ch = *(*format_ptr)++;
1271 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1276 error (EXIT_FAILURE, 0,
1277 _("invalid conversion specifier in suffix: %c"), ch);
1279 error (EXIT_FAILURE, 0,
1280 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1285 max_out (char *format)
1287 size_t out_count = 0;
1288 bool percent = false;
1292 if (*format++ != '%')
1294 else if (*format == '%')
1302 error (EXIT_FAILURE, 0,
1303 _("too many %% conversion specifications in suffix"));
1305 out_count += get_format_flags (&format);
1307 size_t width = get_format_width (&format);
1308 size_t prec = get_format_prec (&format);
1310 out_count += MAX (width, prec);
1312 get_format_conv_type (&format);
1317 error (EXIT_FAILURE, 0,
1318 _("missing %% conversion specification in suffix"));
1324 main (int argc, char **argv)
1327 unsigned long int val;
1329 initialize_main (&argc, &argv);
1330 set_program_name (argv[0]);
1331 setlocale (LC_ALL, "");
1332 bindtextdomain (PACKAGE, LOCALEDIR);
1333 textdomain (PACKAGE);
1335 atexit (close_stdout);
1340 suppress_count = false;
1341 remove_files = true;
1342 prefix = DEFAULT_PREFIX;
1344 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1356 remove_files = false;
1360 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1362 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1368 suppress_count = true;
1372 elide_empty_files = true;
1375 case_GETOPT_HELP_CHAR;
1377 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1380 usage (EXIT_FAILURE);
1383 if (argc - optind < 2)
1386 error (0, 0, _("missing operand"));
1388 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1389 usage (EXIT_FAILURE);
1393 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1395 filename_space = xmalloc (strlen (prefix) + digits + 2);
1397 set_input_file (argv[optind++]);
1399 parse_patterns (argc, optind, argv);
1403 static int const sig[] =
1405 /* The usual suspects. */
1406 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1423 enum { nsigs = sizeof sig / sizeof sig[0] };
1426 struct sigaction act;
1428 sigemptyset (&caught_signals);
1429 for (i = 0; i < nsigs; i++)
1431 sigaction (sig[i], NULL, &act);
1432 if (act.sa_handler != SIG_IGN)
1433 sigaddset (&caught_signals, sig[i]);
1436 act.sa_handler = interrupt_handler;
1437 act.sa_mask = caught_signals;
1440 for (i = 0; i < nsigs; i++)
1441 if (sigismember (&caught_signals, sig[i]))
1442 sigaction (sig[i], &act, NULL);
1444 for (i = 0; i < nsigs; i++)
1445 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1447 signal (sig[i], interrupt_handler);
1448 siginterrupt (sig[i], 1);
1455 if (close (STDIN_FILENO) != 0)
1457 error (0, errno, _("read error"));
1461 exit (EXIT_SUCCESS);
1467 if (status != EXIT_SUCCESS)
1468 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1473 Usage: %s [OPTION]... FILE PATTERN...\n\
1477 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1478 and output byte counts of each piece to standard output.\n\
1482 Mandatory arguments to long options are mandatory for short options too.\n\
1485 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1486 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1487 -k, --keep-files do not remove output files on errors\n\
1490 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1491 -s, --quiet, --silent do not print counts of output file sizes\n\
1492 -z, --elide-empty-files remove empty output files\n\
1494 fputs (HELP_OPTION_DESCRIPTION, stdout);
1495 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1498 Read standard input if FILE is -. Each PATTERN may be:\n\
1502 INTEGER copy up to but not including specified line number\n\
1503 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1504 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1505 {INTEGER} repeat the previous pattern specified number of times\n\
1506 {*} repeat the previous pattern as many times as possible\n\
1508 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1510 emit_bug_reporting_address ();