1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
33 #include "fd-reopen.h"
36 #include "safe-read.h"
40 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
43 # define SA_NOCLDSTOP 0
44 # define sigprocmask(How, Set, Oset) /* empty */
46 # if ! HAVE_SIGINTERRUPT
47 # define siginterrupt(sig, flag) /* empty */
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "csplit"
54 #define AUTHORS "Stuart Kemp", "David MacKenzie"
56 /* Increment size of area for control records. */
59 /* The default prefix for output file names. */
60 #define DEFAULT_PREFIX "xx"
62 /* A compiled pattern arg. */
65 char *regexpr; /* Non-compiled regular expression. */
66 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
67 intmax_t offset; /* Offset from regexp to split at. */
68 uintmax_t lines_required; /* Number of lines required. */
69 uintmax_t repeat; /* Repeat count. */
70 int argnum; /* ARGV index. */
71 bool repeat_forever; /* True if `*' used as a repeat count. */
72 bool ignore; /* If true, produce no output (for regexp). */
75 /* Initial size of data area in buffers. */
76 #define START_SIZE 8191
78 /* Increment size for data area. */
79 #define INCR_SIZE 2048
81 /* Number of lines kept in each node in line list. */
85 /* Some small values to test the algorithms. */
86 # define START_SIZE 200
91 /* A string with a length count. */
98 /* Pointers to the beginnings of lines in the buffer area.
99 These structures are linked together if needed. */
102 size_t used; /* Number of offsets used in this struct. */
103 size_t insert_index; /* Next offset to use when inserting line. */
104 size_t retrieve_index; /* Next index to use when retrieving line. */
105 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
106 struct line *next; /* Next in linked list. */
109 /* The structure to hold the input lines.
110 Contains a pointer to the data area and a list containing
111 pointers to the individual lines. */
114 size_t bytes_alloc; /* Size of the buffer area. */
115 size_t bytes_used; /* Bytes used in the buffer area. */
116 uintmax_t start_line; /* First line number in this buffer. */
117 uintmax_t first_available; /* First line that can be retrieved. */
118 size_t num_lines; /* Number of complete lines in this buffer. */
119 char *buffer; /* Data area. */
120 struct line *line_start; /* Head of list of pointers to lines. */
121 struct line *curr_line; /* The line start record currently in use. */
122 struct buffer_record *next;
125 static void close_output_file (void);
126 static void create_output_file (void);
127 static void delete_all_files (bool);
128 static void save_line_to_file (const struct cstring *line);
129 void usage (int status);
131 /* The name this program was run with. */
134 /* Start of buffer list. */
135 static struct buffer_record *head = NULL;
137 /* Partially read line. */
138 static char *hold_area = NULL;
140 /* Number of bytes in `hold_area'. */
141 static size_t hold_count = 0;
143 /* Number of the last line in the buffers. */
144 static uintmax_t last_line_number = 0;
146 /* Number of the line currently being examined. */
147 static uintmax_t current_line = 0;
149 /* If true, we have read EOF. */
150 static bool have_read_eof = false;
152 /* Name of output files. */
153 static char * volatile filename_space = NULL;
155 /* Prefix part of output file names. */
156 static char * volatile prefix = NULL;
158 /* Suffix part of output file names. */
159 static char * volatile suffix = NULL;
161 /* Number of digits to use in output file names. */
162 static int volatile digits = 2;
164 /* Number of files created so far. */
165 static unsigned int volatile files_created = 0;
167 /* Number of bytes written to current file. */
168 static uintmax_t bytes_written;
170 /* Output file pointer. */
171 static FILE *output_stream = NULL;
173 /* Output file name. */
174 static char *output_filename = NULL;
176 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
177 static char **global_argv;
179 /* If true, do not print the count of bytes in each output file. */
180 static bool suppress_count;
182 /* If true, remove output files on error. */
183 static bool volatile remove_files;
185 /* If true, remove all output files which have a zero length. */
186 static bool elide_empty_files;
188 /* The compiled pattern arguments, which determine how to split
190 static struct control *controls;
192 /* Number of elements in `controls'. */
193 static size_t control_used;
195 /* The set of signals that are caught. */
196 static sigset_t caught_signals;
198 static struct option const longopts[] =
200 {"digits", required_argument, NULL, 'n'},
201 {"quiet", no_argument, NULL, 'q'},
202 {"silent", no_argument, NULL, 's'},
203 {"keep-files", no_argument, NULL, 'k'},
204 {"elide-empty-files", no_argument, NULL, 'z'},
205 {"prefix", required_argument, NULL, 'f'},
206 {"suffix-format", required_argument, NULL, 'b'},
207 {GETOPT_HELP_OPTION_DECL},
208 {GETOPT_VERSION_OPTION_DECL},
212 /* Optionally remove files created so far; then exit.
213 Called when an error detected. */
220 close_output_file ();
222 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
223 delete_all_files (false);
224 sigprocmask (SIG_SETMASK, &oldset, NULL);
227 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
238 error (0, 0, "%s", _("memory exhausted"));
243 interrupt_handler (int sig)
246 signal (sig, SIG_IGN);
248 delete_all_files (true);
250 signal (sig, SIG_DFL);
254 /* Keep track of NUM bytes of a partial line in buffer START.
255 These bytes will be retrieved later when another large buffer is read.
256 It is not necessary to create a new buffer for these bytes; instead,
257 we keep a pointer to the existing buffer. This buffer *is* on the
258 free list, and when the next buffer is obtained from this list
259 (even if it is this one), these bytes will be placed at the
260 start of the new buffer. */
263 save_to_hold_area (char *start, size_t num)
269 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
270 Return the number of bytes read. */
273 read_input (char *dest, size_t max_n_bytes)
277 if (max_n_bytes == 0)
280 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
283 have_read_eof = true;
285 if (bytes_read == SAFE_READ_ERROR)
287 error (0, errno, _("read error"));
294 /* Initialize existing line record P. */
297 clear_line_control (struct line *p)
301 p->retrieve_index = 0;
304 /* Return a new, initialized line record. */
307 new_line_control (void)
309 struct line *p = xmalloc (sizeof *p);
312 clear_line_control (p);
317 /* Record LINE_START, which is the address of the start of a line
318 of length LINE_LEN in the large buffer, in the lines buffer of B. */
321 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
325 /* If there is no existing area to keep line info, get some. */
326 if (b->line_start == NULL)
327 b->line_start = b->curr_line = new_line_control ();
329 /* If existing area for lines is full, get more. */
330 if (b->curr_line->used == CTRL_SIZE)
332 b->curr_line->next = new_line_control ();
333 b->curr_line = b->curr_line->next;
338 /* Record the start of the line, and update counters. */
339 l->starts[l->insert_index].str = line_start;
340 l->starts[l->insert_index].len = line_len;
345 /* Scan the buffer in B for newline characters
346 and record the line start locations and lengths in B.
347 Return the number of lines found in this buffer.
349 There may be an incomplete line at the end of the buffer;
350 a pointer is kept to this area, which will be used when
351 the next buffer is filled. */
354 record_line_starts (struct buffer_record *b)
356 char *line_start; /* Start of current line. */
357 char *line_end; /* End of each line found. */
358 size_t bytes_left; /* Length of incomplete last line. */
359 size_t lines; /* Number of lines found. */
360 size_t line_length; /* Length of each line found. */
362 if (b->bytes_used == 0)
366 line_start = b->buffer;
367 bytes_left = b->bytes_used;
371 line_end = memchr (line_start, '\n', bytes_left);
372 if (line_end == NULL)
374 line_length = line_end - line_start + 1;
375 keep_new_line (b, line_start, line_length);
376 bytes_left -= line_length;
377 line_start = line_end + 1;
381 /* Check for an incomplete last line. */
386 keep_new_line (b, line_start, bytes_left);
390 save_to_hold_area (line_start, bytes_left);
393 b->num_lines = lines;
394 b->first_available = b->start_line = last_line_number + 1;
395 last_line_number += lines;
400 /* Return a new buffer with room to store SIZE bytes, plus
401 an extra byte for safety. */
403 static struct buffer_record *
404 create_new_buffer (size_t size)
406 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
408 new_buffer->buffer = xmalloc (size + 1);
410 new_buffer->bytes_alloc = size;
411 new_buffer->line_start = new_buffer->curr_line = NULL;
416 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
417 least that size is currently free, use it, otherwise create a new one. */
419 static struct buffer_record *
420 get_new_buffer (size_t min_size)
422 struct buffer_record *new_buffer; /* Buffer to return. */
423 size_t alloc_size; /* Actual size that will be requested. */
425 alloc_size = START_SIZE;
426 if (alloc_size < min_size)
428 size_t s = min_size - alloc_size + INCR_SIZE - 1;
429 alloc_size += s - s % INCR_SIZE;
432 new_buffer = create_new_buffer (alloc_size);
434 new_buffer->num_lines = 0;
435 new_buffer->bytes_used = 0;
436 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
437 new_buffer->next = NULL;
443 free_buffer (struct buffer_record *buf)
448 /* Append buffer BUF to the linked list of buffers that contain
449 some data yet to be processed. */
452 save_buffer (struct buffer_record *buf)
454 struct buffer_record *p;
457 buf->curr_line = buf->line_start;
463 for (p = head; p->next; p = p->next)
469 /* Fill a buffer of input.
471 Set the initial size of the buffer to a default.
472 Fill the buffer (from the hold area and input stream)
473 and find the individual lines.
474 If no lines are found (the buffer is too small to hold the next line),
475 release the current buffer (whose contents would have been put in the
476 hold area) and repeat the process with another large buffer until at least
477 one entire line has been read.
479 Return true if a new buffer was obtained, otherwise false
480 (in which case end-of-file must have been encountered). */
485 struct buffer_record *b;
486 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
487 size_t bytes_avail; /* Size of new buffer created. */
488 size_t lines_found; /* Number of lines in this new buffer. */
489 char *p; /* Place to load into buffer. */
494 /* We must make the buffer at least as large as the amount of data
495 in the partial line left over from the last call. */
496 if (bytes_wanted < hold_count)
497 bytes_wanted = hold_count;
501 b = get_new_buffer (bytes_wanted);
502 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
505 /* First check the `holding' area for a partial line. */
509 memcpy (p, hold_area, hold_count);
511 b->bytes_used += hold_count;
512 bytes_avail -= hold_count;
516 b->bytes_used += read_input (p, bytes_avail);
518 lines_found = record_line_starts (b);
519 bytes_wanted = b->bytes_alloc * 2;
523 while (!lines_found && !have_read_eof);
528 return lines_found != 0;
531 /* Return the line number of the first line that has not yet been retrieved. */
534 get_first_line_in_buffer (void)
536 if (head == NULL && !load_buffer ())
537 error (EXIT_FAILURE, errno, _("input disappeared"));
539 return head->first_available;
542 /* Return a pointer to the logical first line in the buffer and make the
543 next line the logical first line.
544 Return NULL if there is no more input. */
546 static struct cstring *
549 /* If non-NULL, this is the buffer for which the previous call
550 returned the final line. So now, presuming that line has been
551 processed, we can free the buffer and reset this pointer. */
552 static struct buffer_record *prev_buf = NULL;
554 struct cstring *line; /* Return value. */
555 struct line *l; /* For convenience. */
559 free_buffer (prev_buf);
563 if (head == NULL && !load_buffer ())
566 if (current_line < head->first_available)
567 current_line = head->first_available;
569 ++(head->first_available);
573 line = &l->starts[l->retrieve_index];
575 /* Advance index to next line. */
576 if (++l->retrieve_index == l->used)
578 /* Go on to the next line record. */
579 head->curr_line = l->next;
580 if (head->curr_line == NULL || head->curr_line->used == 0)
582 /* Go on to the next data block.
583 but first record the current one so we can free it
584 once the line we're returning has been processed. */
593 /* Search the buffers for line LINENUM, reading more input if necessary.
594 Return a pointer to the line, or NULL if it is not found in the file. */
596 static struct cstring *
597 find_line (uintmax_t linenum)
599 struct buffer_record *b;
601 if (head == NULL && !load_buffer ())
604 if (linenum < head->start_line)
609 if (linenum < b->start_line + b->num_lines)
611 /* The line is in this buffer. */
613 size_t offset; /* How far into the buffer the line is. */
616 offset = linenum - b->start_line;
617 /* Find the control record. */
618 while (offset >= CTRL_SIZE)
623 return &l->starts[offset];
625 if (b->next == NULL && !load_buffer ())
627 b = b->next; /* Try the next data block. */
631 /* Return true if at least one more line is available for input. */
636 return find_line (current_line + 1) == NULL;
639 /* Open NAME as standard input. */
642 set_input_file (const char *name)
644 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
645 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
648 /* Write all lines from the beginning of the buffer up to, but
649 not including, line LAST_LINE, to the current output file.
650 If IGNORE is true, do not output lines selected here.
651 ARGNUM is the index in ARGV of the current pattern. */
654 write_to_file (uintmax_t last_line, bool ignore, int argnum)
656 struct cstring *line;
657 uintmax_t first_line; /* First available input line. */
658 uintmax_t lines; /* Number of lines to output. */
661 first_line = get_first_line_in_buffer ();
663 if (first_line > last_line)
665 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
669 lines = last_line - first_line;
671 for (i = 0; i < lines; i++)
673 line = remove_line ();
676 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
680 save_line_to_file (line);
684 /* Output any lines left after all regexps have been processed. */
687 dump_rest_of_file (void)
689 struct cstring *line;
691 while ((line = remove_line ()) != NULL)
692 save_line_to_file (line);
695 /* Handle an attempt to read beyond EOF under the control of record P,
696 on iteration REPETITION if nonzero. */
698 static void handle_line_error (const struct control *, uintmax_t)
701 handle_line_error (const struct control *p, uintmax_t repetition)
703 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
705 fprintf (stderr, _("%s: %s: line number out of range"),
706 program_name, quote (umaxtostr (p->lines_required, buf)));
708 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
710 fprintf (stderr, "\n");
715 /* Determine the line number that marks the end of this file,
716 then get those lines and save them to the output file.
717 P is the control record.
718 REPETITION is the repetition number. */
721 process_line_count (const struct control *p, uintmax_t repetition)
724 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
725 struct cstring *line;
727 create_output_file ();
729 linenum = get_first_line_in_buffer ();
731 while (linenum++ < last_line_to_save)
733 line = remove_line ();
735 handle_line_error (p, repetition);
736 save_line_to_file (line);
739 close_output_file ();
741 /* Ensure that the line number specified is not 1 greater than
742 the number of lines in the file. */
743 if (no_more_lines ())
744 handle_line_error (p, repetition);
747 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
749 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
751 fprintf (stderr, _("%s: %s: match not found"),
752 program_name, quote (global_argv[p->argnum]));
756 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
757 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
760 fprintf (stderr, "\n");
764 dump_rest_of_file ();
765 close_output_file ();
770 /* Read the input until a line matches the regexp in P, outputting
771 it unless P->IGNORE is true.
772 REPETITION is this repeat-count; 0 means the first time. */
775 process_regexp (struct control *p, uintmax_t repetition)
777 struct cstring *line; /* From input file. */
778 size_t line_len; /* To make "$" in regexps work. */
779 uintmax_t break_line; /* First line number of next file. */
780 bool ignore = p->ignore; /* If true, skip this section. */
784 create_output_file ();
786 /* If there is no offset for the regular expression, or
787 it is positive, then it is not necessary to buffer the lines. */
793 line = find_line (++current_line);
796 if (p->repeat_forever)
800 dump_rest_of_file ();
801 close_output_file ();
806 regexp_error (p, repetition, ignore);
808 line_len = line->len;
809 if (line->str[line_len - 1] == '\n')
811 ret = re_search (&p->re_compiled, line->str, line_len,
815 error (0, 0, _("error in regular expression search"));
820 line = remove_line ();
822 save_line_to_file (line);
830 /* Buffer the lines. */
833 line = find_line (++current_line);
836 if (p->repeat_forever)
840 dump_rest_of_file ();
841 close_output_file ();
846 regexp_error (p, repetition, ignore);
848 line_len = line->len;
849 if (line->str[line_len - 1] == '\n')
851 ret = re_search (&p->re_compiled, line->str, line_len,
855 error (0, 0, _("error in regular expression search"));
863 /* Account for any offset from this regexp. */
864 break_line = current_line + p->offset;
866 write_to_file (break_line, ignore, p->argnum);
869 close_output_file ();
872 current_line = break_line;
875 /* Split the input file according to the control records we have built. */
882 for (i = 0; i < control_used; i++)
885 if (controls[i].regexpr)
887 for (j = 0; (controls[i].repeat_forever
888 || j <= controls[i].repeat); j++)
889 process_regexp (&controls[i], j);
893 for (j = 0; (controls[i].repeat_forever
894 || j <= controls[i].repeat); j++)
895 process_line_count (&controls[i], j);
899 create_output_file ();
900 dump_rest_of_file ();
901 close_output_file ();
904 /* Return the name of output file number NUM.
906 This function is called from a signal handler, so it should invoke
907 only reentrant functions that are async-signal-safe. POSIX does
908 not guarantee this for the functions called below, but we don't
909 know of any hosts where this implementation isn't safe. */
912 make_filename (unsigned int num)
914 strcpy (filename_space, prefix);
916 sprintf (filename_space + strlen (prefix), suffix, num);
918 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
919 return filename_space;
922 /* Create the next output file. */
925 create_output_file (void)
931 output_filename = make_filename (files_created);
933 /* Create the output file in a critical section, to avoid races. */
934 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
935 output_stream = fopen (output_filename, "w");
936 fopen_ok = (output_stream != NULL);
938 files_created += fopen_ok;
939 sigprocmask (SIG_SETMASK, &oldset, NULL);
943 error (0, fopen_errno, "%s", output_filename);
949 /* If requested, delete all the files we have created. This function
950 must be called only from critical sections. */
953 delete_all_files (bool in_signal_handler)
960 for (i = 0; i < files_created; i++)
962 const char *name = make_filename (i);
963 if (unlink (name) != 0 && !in_signal_handler)
964 error (0, errno, "%s", name);
970 /* Close the current output file and print the count
971 of characters in this file. */
974 close_output_file (void)
978 if (ferror (output_stream))
980 error (0, 0, _("write error for %s"), quote (output_filename));
981 output_stream = NULL;
984 if (fclose (output_stream) != 0)
986 error (0, errno, "%s", output_filename);
987 output_stream = NULL;
990 if (bytes_written == 0 && elide_empty_files)
996 /* Remove the output file in a critical section, to avoid races. */
997 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
998 unlink_ok = (unlink (output_filename) == 0);
999 unlink_errno = errno;
1000 files_created -= unlink_ok;
1001 sigprocmask (SIG_SETMASK, &oldset, NULL);
1004 error (0, unlink_errno, "%s", output_filename);
1008 if (!suppress_count)
1010 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1011 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1014 output_stream = NULL;
1018 /* Save line LINE to the output file and
1019 increment the character count for the current file. */
1022 save_line_to_file (const struct cstring *line)
1024 fwrite (line->str, sizeof (char), line->len, output_stream);
1025 bytes_written += line->len;
1028 /* Return a new, initialized control record. */
1030 static struct control *
1031 new_control_record (void)
1033 static size_t control_allocated = 0; /* Total space allocated. */
1036 if (control_used == control_allocated)
1037 controls = x2nrealloc (controls, &control_allocated, sizeof *controls);
1038 p = &controls[control_used++];
1041 p->repeat_forever = false;
1042 p->lines_required = 0;
1047 /* Check if there is a numeric offset after a regular expression.
1048 STR is the entire command line argument.
1049 P is the control record for this regular expression.
1050 NUM is the numeric part of STR. */
1053 check_for_offset (struct control *p, const char *str, const char *num)
1055 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1056 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1059 /* Given that the first character of command line arg STR is '{',
1060 make sure that the rest of the string is a valid repeat count
1061 and store its value in P.
1062 ARGNUM is the ARGV index of STR. */
1065 parse_repeat_count (int argnum, struct control *p, char *str)
1070 end = str + strlen (str) - 1;
1072 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1075 if (str+1 == end-1 && *(str+1) == '*')
1076 p->repeat_forever = true;
1079 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1081 error (EXIT_FAILURE, 0,
1082 _("%s}: integer required between `{' and `}'"),
1083 global_argv[argnum]);
1091 /* Extract the regular expression from STR and check for a numeric offset.
1092 STR should start with the regexp delimiter character.
1093 Return a new control record for the regular expression.
1094 ARGNUM is the ARGV index of STR.
1095 Unless IGNORE is true, mark these lines for output. */
1097 static struct control *
1098 extract_regexp (int argnum, bool ignore, char *str)
1100 size_t len; /* Number of bytes in this regexp. */
1102 char *closing_delim;
1106 closing_delim = strrchr (str + 1, delim);
1107 if (closing_delim == NULL)
1108 error (EXIT_FAILURE, 0,
1109 _("%s: closing delimiter `%c' missing"), str, delim);
1111 len = closing_delim - str - 1;
1112 p = new_control_record ();
1116 p->regexpr = xmalloc (len + 1);
1117 strncpy (p->regexpr, str + 1, len);
1118 p->re_compiled.allocated = len * 2;
1119 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1120 p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
1121 p->re_compiled.translate = NULL;
1122 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1125 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1129 if (closing_delim[1])
1130 check_for_offset (p, str, closing_delim + 1);
1135 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1136 After each pattern, check if the next argument is a repeat count. */
1139 parse_patterns (int argc, int start, char **argv)
1141 int i; /* Index into ARGV. */
1142 struct control *p; /* New control record created. */
1144 static uintmax_t last_val = 0;
1146 for (i = start; i < argc; i++)
1148 if (*argv[i] == '/' || *argv[i] == '%')
1150 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1154 p = new_control_record ();
1157 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1158 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1160 error (EXIT_FAILURE, 0,
1161 _("%s: line number must be greater than zero"),
1165 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1166 error (EXIT_FAILURE, 0,
1167 _("line number %s is smaller than preceding line number, %s"),
1168 quote (argv[i]), umaxtostr (last_val, buf));
1171 if (val == last_val)
1173 _("warning: line number %s is the same as preceding line number"),
1178 p->lines_required = val;
1181 if (i + 1 < argc && *argv[i + 1] == '{')
1183 /* We have a repeat count. */
1185 parse_repeat_count (i, p, argv[i]);
1191 get_format_flags (char **format_ptr)
1193 unsigned int count = 0;
1195 for (; **format_ptr; (*format_ptr)++)
1197 switch (**format_ptr)
1208 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1219 get_format_width (char **format_ptr)
1221 unsigned long int val = 0;
1223 if (ISDIGIT (**format_ptr)
1224 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1226 error (EXIT_FAILURE, 0, _("invalid format width"));
1228 /* Allow for enough octal digits to represent the value of UINT_MAX,
1229 even if the field width is less than that. */
1230 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1234 get_format_prec (char **format_ptr)
1236 if (**format_ptr != '.')
1240 if (! ISDIGIT (**format_ptr))
1244 unsigned long int val;
1245 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1247 error (EXIT_FAILURE, 0, _("invalid format precision"));
1253 get_format_conv_type (char **format_ptr)
1255 unsigned char ch = *(*format_ptr)++;
1268 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1273 error (EXIT_FAILURE, 0,
1274 _("invalid conversion specifier in suffix: %c"), ch);
1276 error (EXIT_FAILURE, 0,
1277 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1282 max_out (char *format)
1284 size_t out_count = 0;
1285 bool percent = false;
1289 if (*format++ != '%')
1291 else if (*format == '%')
1299 error (EXIT_FAILURE, 0,
1300 _("too many %% conversion specifications in suffix"));
1302 out_count += get_format_flags (&format);
1304 size_t width = get_format_width (&format);
1305 size_t prec = get_format_prec (&format);
1307 out_count += MAX (width, prec);
1309 get_format_conv_type (&format);
1314 error (EXIT_FAILURE, 0,
1315 _("missing %% conversion specification in suffix"));
1321 main (int argc, char **argv)
1324 unsigned long int val;
1326 initialize_main (&argc, &argv);
1327 program_name = argv[0];
1328 setlocale (LC_ALL, "");
1329 bindtextdomain (PACKAGE, LOCALEDIR);
1330 textdomain (PACKAGE);
1332 atexit (close_stdout);
1337 suppress_count = false;
1338 remove_files = true;
1339 prefix = DEFAULT_PREFIX;
1341 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1353 remove_files = false;
1357 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1359 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1365 suppress_count = true;
1369 elide_empty_files = true;
1372 case_GETOPT_HELP_CHAR;
1374 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1377 usage (EXIT_FAILURE);
1380 if (argc - optind < 2)
1383 error (0, 0, _("missing operand"));
1385 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1386 usage (EXIT_FAILURE);
1390 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1392 filename_space = xmalloc (strlen (prefix) + digits + 2);
1394 set_input_file (argv[optind++]);
1396 parse_patterns (argc, optind, argv);
1400 static int const sig[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM };
1401 enum { nsigs = sizeof sig / sizeof sig[0] };
1404 struct sigaction act;
1406 sigemptyset (&caught_signals);
1407 for (i = 0; i < nsigs; i++)
1409 sigaction (sig[i], NULL, &act);
1410 if (act.sa_handler != SIG_IGN)
1411 sigaddset (&caught_signals, sig[i]);
1414 act.sa_handler = interrupt_handler;
1415 act.sa_mask = caught_signals;
1418 for (i = 0; i < nsigs; i++)
1419 if (sigismember (&caught_signals, sig[i]))
1420 sigaction (sig[i], &act, NULL);
1422 for (i = 0; i < nsigs; i++)
1423 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1425 signal (sig[i], interrupt_handler);
1426 siginterrupt (sig[i], 1);
1433 if (close (STDIN_FILENO) != 0)
1435 error (0, errno, _("read error"));
1439 exit (EXIT_SUCCESS);
1445 if (status != EXIT_SUCCESS)
1446 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1451 Usage: %s [OPTION]... FILE PATTERN...\n\
1455 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1456 and output byte counts of each piece to standard output.\n\
1460 Mandatory arguments to long options are mandatory for short options too.\n\
1463 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1464 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1465 -k, --keep-files do not remove output files on errors\n\
1468 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1469 -s, --quiet, --silent do not print counts of output file sizes\n\
1470 -z, --elide-empty-files remove empty output files\n\
1472 fputs (HELP_OPTION_DESCRIPTION, stdout);
1473 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1476 Read standard input if FILE is -. Each PATTERN may be:\n\
1480 INTEGER copy up to but not including specified line number\n\
1481 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1482 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1483 {INTEGER} repeat the previous pattern specified number of times\n\
1484 {*} repeat the previous pattern as many times as possible\n\
1486 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1488 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);