1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
24 #include <sys/types.h>
32 #include "fd-reopen.h"
35 #include "safe-read.h"
39 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
42 # define SA_NOCLDSTOP 0
43 # define sigprocmask(How, Set, Oset) /* empty */
45 # if ! HAVE_SIGINTERRUPT
46 # define siginterrupt(sig, flag) /* empty */
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "csplit"
53 #define AUTHORS "Stuart Kemp", "David MacKenzie"
55 /* Increment size of area for control records. */
58 /* The default prefix for output file names. */
59 #define DEFAULT_PREFIX "xx"
61 /* A compiled pattern arg. */
64 char *regexpr; /* Non-compiled regular expression. */
65 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
66 intmax_t offset; /* Offset from regexp to split at. */
67 uintmax_t lines_required; /* Number of lines required. */
68 uintmax_t repeat; /* Repeat count. */
69 int argnum; /* ARGV index. */
70 bool repeat_forever; /* True if `*' used as a repeat count. */
71 bool ignore; /* If true, produce no output (for regexp). */
74 /* Initial size of data area in buffers. */
75 #define START_SIZE 8191
77 /* Increment size for data area. */
78 #define INCR_SIZE 2048
80 /* Number of lines kept in each node in line list. */
84 /* Some small values to test the algorithms. */
85 # define START_SIZE 200
90 /* A string with a length count. */
97 /* Pointers to the beginnings of lines in the buffer area.
98 These structures are linked together if needed. */
101 size_t used; /* Number of offsets used in this struct. */
102 size_t insert_index; /* Next offset to use when inserting line. */
103 size_t retrieve_index; /* Next index to use when retrieving line. */
104 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
105 struct line *next; /* Next in linked list. */
108 /* The structure to hold the input lines.
109 Contains a pointer to the data area and a list containing
110 pointers to the individual lines. */
113 size_t bytes_alloc; /* Size of the buffer area. */
114 size_t bytes_used; /* Bytes used in the buffer area. */
115 uintmax_t start_line; /* First line number in this buffer. */
116 uintmax_t first_available; /* First line that can be retrieved. */
117 size_t num_lines; /* Number of complete lines in this buffer. */
118 char *buffer; /* Data area. */
119 struct line *line_start; /* Head of list of pointers to lines. */
120 struct line *curr_line; /* The line start record currently in use. */
121 struct buffer_record *next;
124 static void close_output_file (void);
125 static void create_output_file (void);
126 static void delete_all_files (bool);
127 static void save_line_to_file (const struct cstring *line);
128 void usage (int status);
130 /* The name this program was run with. */
133 /* Start of buffer list. */
134 static struct buffer_record *head = NULL;
136 /* Partially read line. */
137 static char *hold_area = NULL;
139 /* Number of bytes in `hold_area'. */
140 static size_t hold_count = 0;
142 /* Number of the last line in the buffers. */
143 static uintmax_t last_line_number = 0;
145 /* Number of the line currently being examined. */
146 static uintmax_t current_line = 0;
148 /* If true, we have read EOF. */
149 static bool have_read_eof = false;
151 /* Name of output files. */
152 static char * volatile filename_space = NULL;
154 /* Prefix part of output file names. */
155 static char * volatile prefix = NULL;
157 /* Suffix part of output file names. */
158 static char * volatile suffix = NULL;
160 /* Number of digits to use in output file names. */
161 static int volatile digits = 2;
163 /* Number of files created so far. */
164 static unsigned int volatile files_created = 0;
166 /* Number of bytes written to current file. */
167 static uintmax_t bytes_written;
169 /* Output file pointer. */
170 static FILE *output_stream = NULL;
172 /* Output file name. */
173 static char *output_filename = NULL;
175 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
176 static char **global_argv;
178 /* If true, do not print the count of bytes in each output file. */
179 static bool suppress_count;
181 /* If true, remove output files on error. */
182 static bool volatile remove_files;
184 /* If true, remove all output files which have a zero length. */
185 static bool elide_empty_files;
187 /* The compiled pattern arguments, which determine how to split
189 static struct control *controls;
191 /* Number of elements in `controls'. */
192 static size_t control_used;
194 /* The set of signals that are caught. */
195 static sigset_t caught_signals;
197 static struct option const longopts[] =
199 {"digits", required_argument, NULL, 'n'},
200 {"quiet", no_argument, NULL, 'q'},
201 {"silent", no_argument, NULL, 's'},
202 {"keep-files", no_argument, NULL, 'k'},
203 {"elide-empty-files", no_argument, NULL, 'z'},
204 {"prefix", required_argument, NULL, 'f'},
205 {"suffix-format", required_argument, NULL, 'b'},
206 {GETOPT_HELP_OPTION_DECL},
207 {GETOPT_VERSION_OPTION_DECL},
211 /* Optionally remove files created so far; then exit.
212 Called when an error detected. */
219 close_output_file ();
221 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
222 delete_all_files (false);
223 sigprocmask (SIG_SETMASK, &oldset, NULL);
226 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
237 error (0, 0, "%s", _("memory exhausted"));
242 interrupt_handler (int sig)
245 signal (sig, SIG_IGN);
247 delete_all_files (true);
249 signal (sig, SIG_DFL);
253 /* Keep track of NUM bytes of a partial line in buffer START.
254 These bytes will be retrieved later when another large buffer is read.
255 It is not necessary to create a new buffer for these bytes; instead,
256 we keep a pointer to the existing buffer. This buffer *is* on the
257 free list, and when the next buffer is obtained from this list
258 (even if it is this one), these bytes will be placed at the
259 start of the new buffer. */
262 save_to_hold_area (char *start, size_t num)
268 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
269 Return the number of bytes read. */
272 read_input (char *dest, size_t max_n_bytes)
276 if (max_n_bytes == 0)
279 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
282 have_read_eof = true;
284 if (bytes_read == SAFE_READ_ERROR)
286 error (0, errno, _("read error"));
293 /* Initialize existing line record P. */
296 clear_line_control (struct line *p)
300 p->retrieve_index = 0;
303 /* Return a new, initialized line record. */
306 new_line_control (void)
308 struct line *p = xmalloc (sizeof *p);
311 clear_line_control (p);
316 /* Record LINE_START, which is the address of the start of a line
317 of length LINE_LEN in the large buffer, in the lines buffer of B. */
320 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
324 /* If there is no existing area to keep line info, get some. */
325 if (b->line_start == NULL)
326 b->line_start = b->curr_line = new_line_control ();
328 /* If existing area for lines is full, get more. */
329 if (b->curr_line->used == CTRL_SIZE)
331 b->curr_line->next = new_line_control ();
332 b->curr_line = b->curr_line->next;
337 /* Record the start of the line, and update counters. */
338 l->starts[l->insert_index].str = line_start;
339 l->starts[l->insert_index].len = line_len;
344 /* Scan the buffer in B for newline characters
345 and record the line start locations and lengths in B.
346 Return the number of lines found in this buffer.
348 There may be an incomplete line at the end of the buffer;
349 a pointer is kept to this area, which will be used when
350 the next buffer is filled. */
353 record_line_starts (struct buffer_record *b)
355 char *line_start; /* Start of current line. */
356 char *line_end; /* End of each line found. */
357 size_t bytes_left; /* Length of incomplete last line. */
358 size_t lines; /* Number of lines found. */
359 size_t line_length; /* Length of each line found. */
361 if (b->bytes_used == 0)
365 line_start = b->buffer;
366 bytes_left = b->bytes_used;
370 line_end = memchr (line_start, '\n', bytes_left);
371 if (line_end == NULL)
373 line_length = line_end - line_start + 1;
374 keep_new_line (b, line_start, line_length);
375 bytes_left -= line_length;
376 line_start = line_end + 1;
380 /* Check for an incomplete last line. */
385 keep_new_line (b, line_start, bytes_left);
389 save_to_hold_area (line_start, bytes_left);
392 b->num_lines = lines;
393 b->first_available = b->start_line = last_line_number + 1;
394 last_line_number += lines;
399 /* Return a new buffer with room to store SIZE bytes, plus
400 an extra byte for safety. */
402 static struct buffer_record *
403 create_new_buffer (size_t size)
405 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
407 new_buffer->buffer = xmalloc (size + 1);
409 new_buffer->bytes_alloc = size;
410 new_buffer->line_start = new_buffer->curr_line = NULL;
415 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
416 least that size is currently free, use it, otherwise create a new one. */
418 static struct buffer_record *
419 get_new_buffer (size_t min_size)
421 struct buffer_record *new_buffer; /* Buffer to return. */
422 size_t alloc_size; /* Actual size that will be requested. */
424 alloc_size = START_SIZE;
425 if (alloc_size < min_size)
427 size_t s = min_size - alloc_size + INCR_SIZE - 1;
428 alloc_size += s - s % INCR_SIZE;
431 new_buffer = create_new_buffer (alloc_size);
433 new_buffer->num_lines = 0;
434 new_buffer->bytes_used = 0;
435 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
436 new_buffer->next = NULL;
442 free_buffer (struct buffer_record *buf)
447 /* Append buffer BUF to the linked list of buffers that contain
448 some data yet to be processed. */
451 save_buffer (struct buffer_record *buf)
453 struct buffer_record *p;
456 buf->curr_line = buf->line_start;
462 for (p = head; p->next; p = p->next)
468 /* Fill a buffer of input.
470 Set the initial size of the buffer to a default.
471 Fill the buffer (from the hold area and input stream)
472 and find the individual lines.
473 If no lines are found (the buffer is too small to hold the next line),
474 release the current buffer (whose contents would have been put in the
475 hold area) and repeat the process with another large buffer until at least
476 one entire line has been read.
478 Return true if a new buffer was obtained, otherwise false
479 (in which case end-of-file must have been encountered). */
484 struct buffer_record *b;
485 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
486 size_t bytes_avail; /* Size of new buffer created. */
487 size_t lines_found; /* Number of lines in this new buffer. */
488 char *p; /* Place to load into buffer. */
493 /* We must make the buffer at least as large as the amount of data
494 in the partial line left over from the last call. */
495 if (bytes_wanted < hold_count)
496 bytes_wanted = hold_count;
500 b = get_new_buffer (bytes_wanted);
501 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
504 /* First check the `holding' area for a partial line. */
508 memcpy (p, hold_area, hold_count);
510 b->bytes_used += hold_count;
511 bytes_avail -= hold_count;
515 b->bytes_used += read_input (p, bytes_avail);
517 lines_found = record_line_starts (b);
518 bytes_wanted = b->bytes_alloc * 2;
522 while (!lines_found && !have_read_eof);
527 return lines_found != 0;
530 /* Return the line number of the first line that has not yet been retrieved. */
533 get_first_line_in_buffer (void)
535 if (head == NULL && !load_buffer ())
536 error (EXIT_FAILURE, errno, _("input disappeared"));
538 return head->first_available;
541 /* Return a pointer to the logical first line in the buffer and make the
542 next line the logical first line.
543 Return NULL if there is no more input. */
545 static struct cstring *
548 /* If non-NULL, this is the buffer for which the previous call
549 returned the final line. So now, presuming that line has been
550 processed, we can free the buffer and reset this pointer. */
551 static struct buffer_record *prev_buf = NULL;
553 struct cstring *line; /* Return value. */
554 struct line *l; /* For convenience. */
558 free_buffer (prev_buf);
562 if (head == NULL && !load_buffer ())
565 if (current_line < head->first_available)
566 current_line = head->first_available;
568 ++(head->first_available);
572 line = &l->starts[l->retrieve_index];
574 /* Advance index to next line. */
575 if (++l->retrieve_index == l->used)
577 /* Go on to the next line record. */
578 head->curr_line = l->next;
579 if (head->curr_line == NULL || head->curr_line->used == 0)
581 /* Go on to the next data block.
582 but first record the current one so we can free it
583 once the line we're returning has been processed. */
592 /* Search the buffers for line LINENUM, reading more input if necessary.
593 Return a pointer to the line, or NULL if it is not found in the file. */
595 static struct cstring *
596 find_line (uintmax_t linenum)
598 struct buffer_record *b;
600 if (head == NULL && !load_buffer ())
603 if (linenum < head->start_line)
608 if (linenum < b->start_line + b->num_lines)
610 /* The line is in this buffer. */
612 size_t offset; /* How far into the buffer the line is. */
615 offset = linenum - b->start_line;
616 /* Find the control record. */
617 while (offset >= CTRL_SIZE)
622 return &l->starts[offset];
624 if (b->next == NULL && !load_buffer ())
626 b = b->next; /* Try the next data block. */
630 /* Return true if at least one more line is available for input. */
635 return find_line (current_line + 1) == NULL;
638 /* Open NAME as standard input. */
641 set_input_file (const char *name)
643 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
644 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
647 /* Write all lines from the beginning of the buffer up to, but
648 not including, line LAST_LINE, to the current output file.
649 If IGNORE is true, do not output lines selected here.
650 ARGNUM is the index in ARGV of the current pattern. */
653 write_to_file (uintmax_t last_line, bool ignore, int argnum)
655 struct cstring *line;
656 uintmax_t first_line; /* First available input line. */
657 uintmax_t lines; /* Number of lines to output. */
660 first_line = get_first_line_in_buffer ();
662 if (first_line > last_line)
664 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
668 lines = last_line - first_line;
670 for (i = 0; i < lines; i++)
672 line = remove_line ();
675 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
679 save_line_to_file (line);
683 /* Output any lines left after all regexps have been processed. */
686 dump_rest_of_file (void)
688 struct cstring *line;
690 while ((line = remove_line ()) != NULL)
691 save_line_to_file (line);
694 /* Handle an attempt to read beyond EOF under the control of record P,
695 on iteration REPETITION if nonzero. */
697 static void handle_line_error (const struct control *, uintmax_t)
700 handle_line_error (const struct control *p, uintmax_t repetition)
702 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
704 fprintf (stderr, _("%s: %s: line number out of range"),
705 program_name, quote (umaxtostr (p->lines_required, buf)));
707 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
709 fprintf (stderr, "\n");
714 /* Determine the line number that marks the end of this file,
715 then get those lines and save them to the output file.
716 P is the control record.
717 REPETITION is the repetition number. */
720 process_line_count (const struct control *p, uintmax_t repetition)
723 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
724 struct cstring *line;
726 create_output_file ();
728 linenum = get_first_line_in_buffer ();
730 while (linenum++ < last_line_to_save)
732 line = remove_line ();
734 handle_line_error (p, repetition);
735 save_line_to_file (line);
738 close_output_file ();
740 /* Ensure that the line number specified is not 1 greater than
741 the number of lines in the file. */
742 if (no_more_lines ())
743 handle_line_error (p, repetition);
746 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
748 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
750 fprintf (stderr, _("%s: %s: match not found"),
751 program_name, quote (global_argv[p->argnum]));
755 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
756 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
759 fprintf (stderr, "\n");
763 dump_rest_of_file ();
764 close_output_file ();
769 /* Read the input until a line matches the regexp in P, outputting
770 it unless P->IGNORE is true.
771 REPETITION is this repeat-count; 0 means the first time. */
774 process_regexp (struct control *p, uintmax_t repetition)
776 struct cstring *line; /* From input file. */
777 size_t line_len; /* To make "$" in regexps work. */
778 uintmax_t break_line; /* First line number of next file. */
779 bool ignore = p->ignore; /* If true, skip this section. */
783 create_output_file ();
785 /* If there is no offset for the regular expression, or
786 it is positive, then it is not necessary to buffer the lines. */
792 line = find_line (++current_line);
795 if (p->repeat_forever)
799 dump_rest_of_file ();
800 close_output_file ();
805 regexp_error (p, repetition, ignore);
807 line_len = line->len;
808 if (line->str[line_len - 1] == '\n')
810 ret = re_search (&p->re_compiled, line->str, line_len,
814 error (0, 0, _("error in regular expression search"));
819 line = remove_line ();
821 save_line_to_file (line);
829 /* Buffer the lines. */
832 line = find_line (++current_line);
835 if (p->repeat_forever)
839 dump_rest_of_file ();
840 close_output_file ();
845 regexp_error (p, repetition, ignore);
847 line_len = line->len;
848 if (line->str[line_len - 1] == '\n')
850 ret = re_search (&p->re_compiled, line->str, line_len,
854 error (0, 0, _("error in regular expression search"));
862 /* Account for any offset from this regexp. */
863 break_line = current_line + p->offset;
865 write_to_file (break_line, ignore, p->argnum);
868 close_output_file ();
871 current_line = break_line;
874 /* Split the input file according to the control records we have built. */
881 for (i = 0; i < control_used; i++)
884 if (controls[i].regexpr)
886 for (j = 0; (controls[i].repeat_forever
887 || j <= controls[i].repeat); j++)
888 process_regexp (&controls[i], j);
892 for (j = 0; (controls[i].repeat_forever
893 || j <= controls[i].repeat); j++)
894 process_line_count (&controls[i], j);
898 create_output_file ();
899 dump_rest_of_file ();
900 close_output_file ();
903 /* Return the name of output file number NUM.
905 This function is called from a signal handler, so it should invoke
906 only reentrant functions that are async-signal-safe. POSIX does
907 not guarantee this for the functions called below, but we don't
908 know of any hosts where this implementation isn't safe. */
911 make_filename (unsigned int num)
913 strcpy (filename_space, prefix);
915 sprintf (filename_space + strlen (prefix), suffix, num);
917 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
918 return filename_space;
921 /* Create the next output file. */
924 create_output_file (void)
930 output_filename = make_filename (files_created);
932 /* Create the output file in a critical section, to avoid races. */
933 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
934 output_stream = fopen (output_filename, "w");
935 fopen_ok = (output_stream != NULL);
937 files_created += fopen_ok;
938 sigprocmask (SIG_SETMASK, &oldset, NULL);
942 error (0, fopen_errno, "%s", output_filename);
948 /* If requested, delete all the files we have created. This function
949 must be called only from critical sections. */
952 delete_all_files (bool in_signal_handler)
959 for (i = 0; i < files_created; i++)
961 const char *name = make_filename (i);
962 if (unlink (name) != 0 && !in_signal_handler)
963 error (0, errno, "%s", name);
969 /* Close the current output file and print the count
970 of characters in this file. */
973 close_output_file (void)
977 if (ferror (output_stream))
979 error (0, 0, _("write error for %s"), quote (output_filename));
980 output_stream = NULL;
983 if (fclose (output_stream) != 0)
985 error (0, errno, "%s", output_filename);
986 output_stream = NULL;
989 if (bytes_written == 0 && elide_empty_files)
995 /* Remove the output file in a critical section, to avoid races. */
996 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
997 unlink_ok = (unlink (output_filename) == 0);
998 unlink_errno = errno;
999 files_created -= unlink_ok;
1000 sigprocmask (SIG_SETMASK, &oldset, NULL);
1003 error (0, unlink_errno, "%s", output_filename);
1007 if (!suppress_count)
1009 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1010 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1013 output_stream = NULL;
1017 /* Save line LINE to the output file and
1018 increment the character count for the current file. */
1021 save_line_to_file (const struct cstring *line)
1023 fwrite (line->str, sizeof (char), line->len, output_stream);
1024 bytes_written += line->len;
1027 /* Return a new, initialized control record. */
1029 static struct control *
1030 new_control_record (void)
1032 static size_t control_allocated = 0; /* Total space allocated. */
1035 if (control_used == control_allocated)
1036 controls = X2NREALLOC (controls, &control_allocated);
1037 p = &controls[control_used++];
1040 p->repeat_forever = false;
1041 p->lines_required = 0;
1046 /* Check if there is a numeric offset after a regular expression.
1047 STR is the entire command line argument.
1048 P is the control record for this regular expression.
1049 NUM is the numeric part of STR. */
1052 check_for_offset (struct control *p, const char *str, const char *num)
1054 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1055 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1058 /* Given that the first character of command line arg STR is '{',
1059 make sure that the rest of the string is a valid repeat count
1060 and store its value in P.
1061 ARGNUM is the ARGV index of STR. */
1064 parse_repeat_count (int argnum, struct control *p, char *str)
1069 end = str + strlen (str) - 1;
1071 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1074 if (str+1 == end-1 && *(str+1) == '*')
1075 p->repeat_forever = true;
1078 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1080 error (EXIT_FAILURE, 0,
1081 _("%s}: integer required between `{' and `}'"),
1082 global_argv[argnum]);
1090 /* Extract the regular expression from STR and check for a numeric offset.
1091 STR should start with the regexp delimiter character.
1092 Return a new control record for the regular expression.
1093 ARGNUM is the ARGV index of STR.
1094 Unless IGNORE is true, mark these lines for output. */
1096 static struct control *
1097 extract_regexp (int argnum, bool ignore, char *str)
1099 size_t len; /* Number of bytes in this regexp. */
1101 char *closing_delim;
1105 closing_delim = strrchr (str + 1, delim);
1106 if (closing_delim == NULL)
1107 error (EXIT_FAILURE, 0,
1108 _("%s: closing delimiter `%c' missing"), str, delim);
1110 len = closing_delim - str - 1;
1111 p = new_control_record ();
1115 p->regexpr = xmalloc (len + 1);
1116 strncpy (p->regexpr, str + 1, len);
1117 p->re_compiled.allocated = len * 2;
1118 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1119 p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
1120 p->re_compiled.translate = NULL;
1121 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1124 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1128 if (closing_delim[1])
1129 check_for_offset (p, str, closing_delim + 1);
1134 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1135 After each pattern, check if the next argument is a repeat count. */
1138 parse_patterns (int argc, int start, char **argv)
1140 int i; /* Index into ARGV. */
1141 struct control *p; /* New control record created. */
1143 static uintmax_t last_val = 0;
1145 for (i = start; i < argc; i++)
1147 if (*argv[i] == '/' || *argv[i] == '%')
1149 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1153 p = new_control_record ();
1156 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1157 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1159 error (EXIT_FAILURE, 0,
1160 _("%s: line number must be greater than zero"),
1164 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1165 error (EXIT_FAILURE, 0,
1166 _("line number %s is smaller than preceding line number, %s"),
1167 quote (argv[i]), umaxtostr (last_val, buf));
1170 if (val == last_val)
1172 _("warning: line number %s is the same as preceding line number"),
1177 p->lines_required = val;
1180 if (i + 1 < argc && *argv[i + 1] == '{')
1182 /* We have a repeat count. */
1184 parse_repeat_count (i, p, argv[i]);
1190 get_format_flags (char **format_ptr)
1192 unsigned int count = 0;
1194 for (; **format_ptr; (*format_ptr)++)
1196 switch (**format_ptr)
1207 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1218 get_format_width (char **format_ptr)
1220 unsigned long int val = 0;
1222 if (ISDIGIT (**format_ptr)
1223 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1225 error (EXIT_FAILURE, 0, _("invalid format width"));
1227 /* Allow for enough octal digits to represent the value of UINT_MAX,
1228 even if the field width is less than that. */
1229 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1233 get_format_prec (char **format_ptr)
1235 if (**format_ptr != '.')
1239 if (! ISDIGIT (**format_ptr))
1243 unsigned long int val;
1244 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1246 error (EXIT_FAILURE, 0, _("invalid format precision"));
1252 get_format_conv_type (char **format_ptr)
1254 unsigned char ch = *(*format_ptr)++;
1267 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1272 error (EXIT_FAILURE, 0,
1273 _("invalid conversion specifier in suffix: %c"), ch);
1275 error (EXIT_FAILURE, 0,
1276 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1281 max_out (char *format)
1283 size_t out_count = 0;
1284 bool percent = false;
1288 if (*format++ != '%')
1290 else if (*format == '%')
1298 error (EXIT_FAILURE, 0,
1299 _("too many %% conversion specifications in suffix"));
1301 out_count += get_format_flags (&format);
1303 size_t width = get_format_width (&format);
1304 size_t prec = get_format_prec (&format);
1306 out_count += MAX (width, prec);
1308 get_format_conv_type (&format);
1313 error (EXIT_FAILURE, 0,
1314 _("missing %% conversion specification in suffix"));
1320 main (int argc, char **argv)
1323 unsigned long int val;
1325 initialize_main (&argc, &argv);
1326 program_name = argv[0];
1327 setlocale (LC_ALL, "");
1328 bindtextdomain (PACKAGE, LOCALEDIR);
1329 textdomain (PACKAGE);
1331 atexit (close_stdout);
1336 suppress_count = false;
1337 remove_files = true;
1338 prefix = DEFAULT_PREFIX;
1340 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1352 remove_files = false;
1356 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1358 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1364 suppress_count = true;
1368 elide_empty_files = true;
1371 case_GETOPT_HELP_CHAR;
1373 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1376 usage (EXIT_FAILURE);
1379 if (argc - optind < 2)
1382 error (0, 0, _("missing operand"));
1384 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1385 usage (EXIT_FAILURE);
1389 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1391 filename_space = xmalloc (strlen (prefix) + digits + 2);
1393 set_input_file (argv[optind++]);
1395 parse_patterns (argc, optind, argv);
1399 static int const sig[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM };
1400 enum { nsigs = sizeof sig / sizeof sig[0] };
1403 struct sigaction act;
1405 sigemptyset (&caught_signals);
1406 for (i = 0; i < nsigs; i++)
1408 sigaction (sig[i], NULL, &act);
1409 if (act.sa_handler != SIG_IGN)
1410 sigaddset (&caught_signals, sig[i]);
1413 act.sa_handler = interrupt_handler;
1414 act.sa_mask = caught_signals;
1417 for (i = 0; i < nsigs; i++)
1418 if (sigismember (&caught_signals, sig[i]))
1419 sigaction (sig[i], &act, NULL);
1421 for (i = 0; i < nsigs; i++)
1422 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1424 signal (sig[i], interrupt_handler);
1425 siginterrupt (sig[i], 1);
1432 if (close (STDIN_FILENO) != 0)
1434 error (0, errno, _("read error"));
1438 exit (EXIT_SUCCESS);
1444 if (status != EXIT_SUCCESS)
1445 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1450 Usage: %s [OPTION]... FILE PATTERN...\n\
1454 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1455 and output byte counts of each piece to standard output.\n\
1459 Mandatory arguments to long options are mandatory for short options too.\n\
1462 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1463 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1464 -k, --keep-files do not remove output files on errors\n\
1467 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1468 -s, --quiet, --silent do not print counts of output file sizes\n\
1469 -z, --elide-empty-files remove empty output files\n\
1471 fputs (HELP_OPTION_DESCRIPTION, stdout);
1472 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1475 Read standard input if FILE is -. Each PATTERN may be:\n\
1479 INTEGER copy up to but not including specified line number\n\
1480 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1481 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1482 {INTEGER} repeat the previous pattern specified number of times\n\
1483 {*} repeat the previous pattern as many times as possible\n\
1485 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1487 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);