1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2005 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
35 #include "safe-read.h"
36 #include "stdio-safer.h"
39 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
42 # define SA_NOCLDSTOP 0
43 # define sigprocmask(How, Set, Oset) /* empty */
45 # if ! HAVE_SIGINTERRUPT
46 # define siginterrupt(sig, flag) /* empty */
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "csplit"
53 #define AUTHORS "Stuart Kemp", "David MacKenzie"
55 /* Increment size of area for control records. */
58 /* The default prefix for output file names. */
59 #define DEFAULT_PREFIX "xx"
61 /* A compiled pattern arg. */
64 char *regexpr; /* Non-compiled regular expression. */
65 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
66 intmax_t offset; /* Offset from regexp to split at. */
67 uintmax_t lines_required; /* Number of lines required. */
68 uintmax_t repeat; /* Repeat count. */
69 int argnum; /* ARGV index. */
70 bool repeat_forever; /* True if `*' used as a repeat count. */
71 bool ignore; /* If true, produce no output (for regexp). */
74 /* Initial size of data area in buffers. */
75 #define START_SIZE 8191
77 /* Increment size for data area. */
78 #define INCR_SIZE 2048
80 /* Number of lines kept in each node in line list. */
84 /* Some small values to test the algorithms. */
85 # define START_SIZE 200
90 /* A string with a length count. */
97 /* Pointers to the beginnings of lines in the buffer area.
98 These structures are linked together if needed. */
101 size_t used; /* Number of offsets used in this struct. */
102 size_t insert_index; /* Next offset to use when inserting line. */
103 size_t retrieve_index; /* Next index to use when retrieving line. */
104 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
105 struct line *next; /* Next in linked list. */
108 /* The structure to hold the input lines.
109 Contains a pointer to the data area and a list containing
110 pointers to the individual lines. */
113 size_t bytes_alloc; /* Size of the buffer area. */
114 size_t bytes_used; /* Bytes used in the buffer area. */
115 uintmax_t start_line; /* First line number in this buffer. */
116 uintmax_t first_available; /* First line that can be retrieved. */
117 size_t num_lines; /* Number of complete lines in this buffer. */
118 char *buffer; /* Data area. */
119 struct line *line_start; /* Head of list of pointers to lines. */
120 struct line *curr_line; /* The line start record currently in use. */
121 struct buffer_record *next;
124 static void close_output_file (void);
125 static void create_output_file (void);
126 static void delete_all_files (bool);
127 static void save_line_to_file (const struct cstring *line);
128 void usage (int status);
130 /* The name this program was run with. */
133 /* Input file descriptor. */
134 static int input_desc;
136 /* Start of buffer list. */
137 static struct buffer_record *head = NULL;
139 /* Partially read line. */
140 static char *hold_area = NULL;
142 /* Number of bytes in `hold_area'. */
143 static size_t hold_count = 0;
145 /* Number of the last line in the buffers. */
146 static uintmax_t last_line_number = 0;
148 /* Number of the line currently being examined. */
149 static uintmax_t current_line = 0;
151 /* If true, we have read EOF. */
152 static bool have_read_eof = false;
154 /* Name of output files. */
155 static char * volatile filename_space = NULL;
157 /* Prefix part of output file names. */
158 static char * volatile prefix = NULL;
160 /* Suffix part of output file names. */
161 static char * volatile suffix = NULL;
163 /* Number of digits to use in output file names. */
164 static int volatile digits = 2;
166 /* Number of files created so far. */
167 static unsigned int volatile files_created = 0;
169 /* Number of bytes written to current file. */
170 static uintmax_t bytes_written;
172 /* Output file pointer. */
173 static FILE *output_stream = NULL;
175 /* Output file name. */
176 static char *output_filename = NULL;
178 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
179 static char **global_argv;
181 /* If true, do not print the count of bytes in each output file. */
182 static bool suppress_count;
184 /* If true, remove output files on error. */
185 static bool volatile remove_files;
187 /* If true, remove all output files which have a zero length. */
188 static bool elide_empty_files;
190 /* The compiled pattern arguments, which determine how to split
192 static struct control *controls;
194 /* Number of elements in `controls'. */
195 static size_t control_used;
197 /* The set of signals that are caught. */
198 static sigset_t caught_signals;
200 static struct option const longopts[] =
202 {"digits", required_argument, NULL, 'n'},
203 {"quiet", no_argument, NULL, 'q'},
204 {"silent", no_argument, NULL, 's'},
205 {"keep-files", no_argument, NULL, 'k'},
206 {"elide-empty-files", no_argument, NULL, 'z'},
207 {"prefix", required_argument, NULL, 'f'},
208 {"suffix-format", required_argument, NULL, 'b'},
209 {GETOPT_HELP_OPTION_DECL},
210 {GETOPT_VERSION_OPTION_DECL},
214 /* Optionally remove files created so far; then exit.
215 Called when an error detected. */
222 close_output_file ();
224 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
225 delete_all_files (false);
226 sigprocmask (SIG_SETMASK, &oldset, NULL);
229 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
240 error (0, 0, "%s", _("memory exhausted"));
245 interrupt_handler (int sig)
248 signal (sig, SIG_IGN);
250 delete_all_files (true);
252 signal (sig, SIG_DFL);
256 /* Keep track of NUM bytes of a partial line in buffer START.
257 These bytes will be retrieved later when another large buffer is read.
258 It is not necessary to create a new buffer for these bytes; instead,
259 we keep a pointer to the existing buffer. This buffer *is* on the
260 free list, and when the next buffer is obtained from this list
261 (even if it is this one), these bytes will be placed at the
262 start of the new buffer. */
265 save_to_hold_area (char *start, size_t num)
271 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
272 Return the number of bytes read. */
275 read_input (char *dest, size_t max_n_bytes)
279 if (max_n_bytes == 0)
282 bytes_read = safe_read (input_desc, dest, max_n_bytes);
285 have_read_eof = true;
287 if (bytes_read == SAFE_READ_ERROR)
289 error (0, errno, _("read error"));
296 /* Initialize existing line record P. */
299 clear_line_control (struct line *p)
303 p->retrieve_index = 0;
306 /* Return a new, initialized line record. */
309 new_line_control (void)
311 struct line *p = xmalloc (sizeof *p);
314 clear_line_control (p);
319 /* Record LINE_START, which is the address of the start of a line
320 of length LINE_LEN in the large buffer, in the lines buffer of B. */
323 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
327 /* If there is no existing area to keep line info, get some. */
328 if (b->line_start == NULL)
329 b->line_start = b->curr_line = new_line_control ();
331 /* If existing area for lines is full, get more. */
332 if (b->curr_line->used == CTRL_SIZE)
334 b->curr_line->next = new_line_control ();
335 b->curr_line = b->curr_line->next;
340 /* Record the start of the line, and update counters. */
341 l->starts[l->insert_index].str = line_start;
342 l->starts[l->insert_index].len = line_len;
347 /* Scan the buffer in B for newline characters
348 and record the line start locations and lengths in B.
349 Return the number of lines found in this buffer.
351 There may be an incomplete line at the end of the buffer;
352 a pointer is kept to this area, which will be used when
353 the next buffer is filled. */
356 record_line_starts (struct buffer_record *b)
358 char *line_start; /* Start of current line. */
359 char *line_end; /* End of each line found. */
360 size_t bytes_left; /* Length of incomplete last line. */
361 size_t lines; /* Number of lines found. */
362 size_t line_length; /* Length of each line found. */
364 if (b->bytes_used == 0)
368 line_start = b->buffer;
369 bytes_left = b->bytes_used;
373 line_end = memchr (line_start, '\n', bytes_left);
374 if (line_end == NULL)
376 line_length = line_end - line_start + 1;
377 keep_new_line (b, line_start, line_length);
378 bytes_left -= line_length;
379 line_start = line_end + 1;
383 /* Check for an incomplete last line. */
388 keep_new_line (b, line_start, bytes_left);
392 save_to_hold_area (line_start, bytes_left);
395 b->num_lines = lines;
396 b->first_available = b->start_line = last_line_number + 1;
397 last_line_number += lines;
402 /* Return a new buffer with room to store SIZE bytes, plus
403 an extra byte for safety. */
405 static struct buffer_record *
406 create_new_buffer (size_t size)
408 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
410 new_buffer->buffer = xmalloc (size + 1);
412 new_buffer->bytes_alloc = size;
413 new_buffer->line_start = new_buffer->curr_line = NULL;
418 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
419 least that size is currently free, use it, otherwise create a new one. */
421 static struct buffer_record *
422 get_new_buffer (size_t min_size)
424 struct buffer_record *new_buffer; /* Buffer to return. */
425 size_t alloc_size; /* Actual size that will be requested. */
427 alloc_size = START_SIZE;
428 if (alloc_size < min_size)
430 size_t s = min_size - alloc_size + INCR_SIZE - 1;
431 alloc_size += s - s % INCR_SIZE;
434 new_buffer = create_new_buffer (alloc_size);
436 new_buffer->num_lines = 0;
437 new_buffer->bytes_used = 0;
438 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
439 new_buffer->next = NULL;
445 free_buffer (struct buffer_record *buf)
450 /* Append buffer BUF to the linked list of buffers that contain
451 some data yet to be processed. */
454 save_buffer (struct buffer_record *buf)
456 struct buffer_record *p;
459 buf->curr_line = buf->line_start;
465 for (p = head; p->next; p = p->next)
471 /* Fill a buffer of input.
473 Set the initial size of the buffer to a default.
474 Fill the buffer (from the hold area and input stream)
475 and find the individual lines.
476 If no lines are found (the buffer is too small to hold the next line),
477 release the current buffer (whose contents would have been put in the
478 hold area) and repeat the process with another large buffer until at least
479 one entire line has been read.
481 Return true if a new buffer was obtained, otherwise false
482 (in which case end-of-file must have been encountered). */
487 struct buffer_record *b;
488 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
489 size_t bytes_avail; /* Size of new buffer created. */
490 size_t lines_found; /* Number of lines in this new buffer. */
491 char *p; /* Place to load into buffer. */
496 /* We must make the buffer at least as large as the amount of data
497 in the partial line left over from the last call. */
498 if (bytes_wanted < hold_count)
499 bytes_wanted = hold_count;
503 b = get_new_buffer (bytes_wanted);
504 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
507 /* First check the `holding' area for a partial line. */
511 memcpy (p, hold_area, hold_count);
513 b->bytes_used += hold_count;
514 bytes_avail -= hold_count;
518 b->bytes_used += read_input (p, bytes_avail);
520 lines_found = record_line_starts (b);
521 bytes_wanted = b->bytes_alloc * 2;
525 while (!lines_found && !have_read_eof);
530 return lines_found != 0;
533 /* Return the line number of the first line that has not yet been retrieved. */
536 get_first_line_in_buffer (void)
538 if (head == NULL && !load_buffer ())
539 error (EXIT_FAILURE, errno, _("input disappeared"));
541 return head->first_available;
544 /* Return a pointer to the logical first line in the buffer and make the
545 next line the logical first line.
546 Return NULL if there is no more input. */
548 static struct cstring *
551 /* If non-NULL, this is the buffer for which the previous call
552 returned the final line. So now, presuming that line has been
553 processed, we can free the buffer and reset this pointer. */
554 static struct buffer_record *prev_buf = NULL;
556 struct cstring *line; /* Return value. */
557 struct line *l; /* For convenience. */
561 free_buffer (prev_buf);
565 if (head == NULL && !load_buffer ())
568 if (current_line < head->first_available)
569 current_line = head->first_available;
571 ++(head->first_available);
575 line = &l->starts[l->retrieve_index];
577 /* Advance index to next line. */
578 if (++l->retrieve_index == l->used)
580 /* Go on to the next line record. */
581 head->curr_line = l->next;
582 if (head->curr_line == NULL || head->curr_line->used == 0)
584 /* Go on to the next data block.
585 but first record the current one so we can free it
586 once the line we're returning has been processed. */
595 /* Search the buffers for line LINENUM, reading more input if necessary.
596 Return a pointer to the line, or NULL if it is not found in the file. */
598 static struct cstring *
599 find_line (uintmax_t linenum)
601 struct buffer_record *b;
603 if (head == NULL && !load_buffer ())
606 if (linenum < head->start_line)
611 if (linenum < b->start_line + b->num_lines)
613 /* The line is in this buffer. */
615 size_t offset; /* How far into the buffer the line is. */
618 offset = linenum - b->start_line;
619 /* Find the control record. */
620 while (offset >= CTRL_SIZE)
625 return &l->starts[offset];
627 if (b->next == NULL && !load_buffer ())
629 b = b->next; /* Try the next data block. */
633 /* Return true if at least one more line is available for input. */
638 return find_line (current_line + 1) == NULL;
641 /* Set the name of the input file to NAME and open it. */
644 set_input_file (const char *name)
646 if (STREQ (name, "-"))
647 input_desc = STDIN_FILENO;
650 input_desc = open (name, O_RDONLY);
652 error (EXIT_FAILURE, errno, "%s", name);
656 /* Write all lines from the beginning of the buffer up to, but
657 not including, line LAST_LINE, to the current output file.
658 If IGNORE is true, do not output lines selected here.
659 ARGNUM is the index in ARGV of the current pattern. */
662 write_to_file (uintmax_t last_line, bool ignore, int argnum)
664 struct cstring *line;
665 uintmax_t first_line; /* First available input line. */
666 uintmax_t lines; /* Number of lines to output. */
669 first_line = get_first_line_in_buffer ();
671 if (first_line > last_line)
673 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
677 lines = last_line - first_line;
679 for (i = 0; i < lines; i++)
681 line = remove_line ();
684 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
688 save_line_to_file (line);
692 /* Output any lines left after all regexps have been processed. */
695 dump_rest_of_file (void)
697 struct cstring *line;
699 while ((line = remove_line ()) != NULL)
700 save_line_to_file (line);
703 /* Handle an attempt to read beyond EOF under the control of record P,
704 on iteration REPETITION if nonzero. */
706 static void handle_line_error (const struct control *, uintmax_t)
709 handle_line_error (const struct control *p, uintmax_t repetition)
711 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
713 fprintf (stderr, _("%s: %s: line number out of range"),
714 program_name, quote (umaxtostr (p->lines_required, buf)));
716 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
718 fprintf (stderr, "\n");
723 /* Determine the line number that marks the end of this file,
724 then get those lines and save them to the output file.
725 P is the control record.
726 REPETITION is the repetition number. */
729 process_line_count (const struct control *p, uintmax_t repetition)
732 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
733 struct cstring *line;
735 create_output_file ();
737 linenum = get_first_line_in_buffer ();
739 while (linenum++ < last_line_to_save)
741 line = remove_line ();
743 handle_line_error (p, repetition);
744 save_line_to_file (line);
747 close_output_file ();
749 /* Ensure that the line number specified is not 1 greater than
750 the number of lines in the file. */
751 if (no_more_lines ())
752 handle_line_error (p, repetition);
755 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
757 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
759 fprintf (stderr, _("%s: %s: match not found"),
760 program_name, quote (global_argv[p->argnum]));
764 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
765 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
768 fprintf (stderr, "\n");
772 dump_rest_of_file ();
773 close_output_file ();
778 /* Read the input until a line matches the regexp in P, outputting
779 it unless P->IGNORE is true.
780 REPETITION is this repeat-count; 0 means the first time. */
783 process_regexp (struct control *p, uintmax_t repetition)
785 struct cstring *line; /* From input file. */
786 size_t line_len; /* To make "$" in regexps work. */
787 uintmax_t break_line; /* First line number of next file. */
788 bool ignore = p->ignore; /* If true, skip this section. */
792 create_output_file ();
794 /* If there is no offset for the regular expression, or
795 it is positive, then it is not necessary to buffer the lines. */
801 line = find_line (++current_line);
804 if (p->repeat_forever)
808 dump_rest_of_file ();
809 close_output_file ();
814 regexp_error (p, repetition, ignore);
816 line_len = line->len;
817 if (line->str[line_len - 1] == '\n')
819 ret = re_search (&p->re_compiled, line->str, line_len,
823 error (0, 0, _("error in regular expression search"));
828 line = remove_line ();
830 save_line_to_file (line);
838 /* Buffer the lines. */
841 line = find_line (++current_line);
844 if (p->repeat_forever)
848 dump_rest_of_file ();
849 close_output_file ();
854 regexp_error (p, repetition, ignore);
856 line_len = line->len;
857 if (line->str[line_len - 1] == '\n')
859 ret = re_search (&p->re_compiled, line->str, line_len,
863 error (0, 0, _("error in regular expression search"));
871 /* Account for any offset from this regexp. */
872 break_line = current_line + p->offset;
874 write_to_file (break_line, ignore, p->argnum);
877 close_output_file ();
880 current_line = break_line;
883 /* Split the input file according to the control records we have built. */
890 for (i = 0; i < control_used; i++)
893 if (controls[i].regexpr)
895 for (j = 0; (controls[i].repeat_forever
896 || j <= controls[i].repeat); j++)
897 process_regexp (&controls[i], j);
901 for (j = 0; (controls[i].repeat_forever
902 || j <= controls[i].repeat); j++)
903 process_line_count (&controls[i], j);
907 create_output_file ();
908 dump_rest_of_file ();
909 close_output_file ();
912 /* Return the name of output file number NUM.
914 This function is called from a signal handler, so it should invoke
915 only reentrant functions that are async-signal-safe. POSIX does
916 not guarantee this for the functions called below, but we don't
917 know of any hosts where this implementation isn't safe. */
920 make_filename (unsigned int num)
922 strcpy (filename_space, prefix);
924 sprintf (filename_space + strlen (prefix), suffix, num);
926 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
927 return filename_space;
930 /* Create the next output file. */
933 create_output_file (void)
939 output_filename = make_filename (files_created);
941 /* Create the output file in a critical section, to avoid races. */
942 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
943 output_stream = fopen_safer (output_filename, "w");
944 fopen_ok = (output_stream != NULL);
946 files_created += fopen_ok;
947 sigprocmask (SIG_SETMASK, &oldset, NULL);
951 error (0, fopen_errno, "%s", output_filename);
957 /* If requested, delete all the files we have created. This function
958 must be called only from critical sections. */
961 delete_all_files (bool in_signal_handler)
968 for (i = 0; i < files_created; i++)
970 const char *name = make_filename (i);
971 if (unlink (name) != 0 && !in_signal_handler)
972 error (0, errno, "%s", name);
978 /* Close the current output file and print the count
979 of characters in this file. */
982 close_output_file (void)
986 if (ferror (output_stream))
988 error (0, 0, _("write error for %s"), quote (output_filename));
989 output_stream = NULL;
992 if (fclose (output_stream) != 0)
994 error (0, errno, "%s", output_filename);
995 output_stream = NULL;
998 if (bytes_written == 0 && elide_empty_files)
1004 /* Remove the output file in a critical section, to avoid races. */
1005 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1006 unlink_ok = (unlink (output_filename) == 0);
1007 unlink_errno = errno;
1008 files_created -= unlink_ok;
1009 sigprocmask (SIG_SETMASK, &oldset, NULL);
1012 error (0, unlink_errno, "%s", output_filename);
1016 if (!suppress_count)
1018 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1019 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1022 output_stream = NULL;
1026 /* Save line LINE to the output file and
1027 increment the character count for the current file. */
1030 save_line_to_file (const struct cstring *line)
1032 fwrite (line->str, sizeof (char), line->len, output_stream);
1033 bytes_written += line->len;
1036 /* Return a new, initialized control record. */
1038 static struct control *
1039 new_control_record (void)
1041 static size_t control_allocated = 0; /* Total space allocated. */
1044 if (control_used == control_allocated)
1045 controls = x2nrealloc (controls, &control_allocated, sizeof *controls);
1046 p = &controls[control_used++];
1049 p->repeat_forever = false;
1050 p->lines_required = 0;
1055 /* Check if there is a numeric offset after a regular expression.
1056 STR is the entire command line argument.
1057 P is the control record for this regular expression.
1058 NUM is the numeric part of STR. */
1061 check_for_offset (struct control *p, const char *str, const char *num)
1063 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1064 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1067 /* Given that the first character of command line arg STR is '{',
1068 make sure that the rest of the string is a valid repeat count
1069 and store its value in P.
1070 ARGNUM is the ARGV index of STR. */
1073 parse_repeat_count (int argnum, struct control *p, char *str)
1078 end = str + strlen (str) - 1;
1080 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1083 if (str+1 == end-1 && *(str+1) == '*')
1084 p->repeat_forever = true;
1087 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1089 error (EXIT_FAILURE, 0,
1090 _("%s}: integer required between `{' and `}'"),
1091 global_argv[argnum]);
1099 /* Extract the regular expression from STR and check for a numeric offset.
1100 STR should start with the regexp delimiter character.
1101 Return a new control record for the regular expression.
1102 ARGNUM is the ARGV index of STR.
1103 Unless IGNORE is true, mark these lines for output. */
1105 static struct control *
1106 extract_regexp (int argnum, bool ignore, char *str)
1108 size_t len; /* Number of bytes in this regexp. */
1110 char *closing_delim;
1114 closing_delim = strrchr (str + 1, delim);
1115 if (closing_delim == NULL)
1116 error (EXIT_FAILURE, 0,
1117 _("%s: closing delimiter `%c' missing"), str, delim);
1119 len = closing_delim - str - 1;
1120 p = new_control_record ();
1124 p->regexpr = xmalloc (len + 1);
1125 strncpy (p->regexpr, str + 1, len);
1126 p->re_compiled.allocated = len * 2;
1127 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1128 p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
1129 p->re_compiled.translate = NULL;
1130 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1133 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1137 if (closing_delim[1])
1138 check_for_offset (p, str, closing_delim + 1);
1143 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1144 After each pattern, check if the next argument is a repeat count. */
1147 parse_patterns (int argc, int start, char **argv)
1149 int i; /* Index into ARGV. */
1150 struct control *p; /* New control record created. */
1152 static uintmax_t last_val = 0;
1154 for (i = start; i < argc; i++)
1156 if (*argv[i] == '/' || *argv[i] == '%')
1158 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1162 p = new_control_record ();
1165 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1166 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1168 error (EXIT_FAILURE, 0,
1169 _("%s: line number must be greater than zero"),
1173 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1174 error (EXIT_FAILURE, 0,
1175 _("line number %s is smaller than preceding line number, %s"),
1176 quote (argv[i]), umaxtostr (last_val, buf));
1179 if (val == last_val)
1181 _("warning: line number %s is the same as preceding line number"),
1186 p->lines_required = val;
1189 if (i + 1 < argc && *argv[i + 1] == '{')
1191 /* We have a repeat count. */
1193 parse_repeat_count (i, p, argv[i]);
1199 get_format_flags (char **format_ptr)
1201 unsigned int count = 0;
1203 for (; **format_ptr; (*format_ptr)++)
1205 switch (**format_ptr)
1216 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1227 get_format_width (char **format_ptr)
1229 unsigned long int val = 0;
1231 if (ISDIGIT (**format_ptr)
1232 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1234 error (EXIT_FAILURE, 0, _("invalid format width"));
1236 /* Allow for enough octal digits to represent the value of UINT_MAX,
1237 even if the field width is less than that. */
1238 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1242 get_format_prec (char **format_ptr)
1244 if (**format_ptr != '.')
1248 if (! ISDIGIT (**format_ptr))
1252 unsigned long int val;
1253 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1255 error (EXIT_FAILURE, 0, _("invalid format precision"));
1261 get_format_conv_type (char **format_ptr)
1263 unsigned char ch = *(*format_ptr)++;
1276 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1281 error (EXIT_FAILURE, 0,
1282 _("invalid conversion specifier in suffix: %c"), ch);
1284 error (EXIT_FAILURE, 0,
1285 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1290 max_out (char *format)
1292 size_t out_count = 0;
1293 bool percent = false;
1297 if (*format++ != '%')
1299 else if (*format == '%')
1307 error (EXIT_FAILURE, 0,
1308 _("too many %% conversion specifications in suffix"));
1310 out_count += get_format_flags (&format);
1312 size_t width = get_format_width (&format);
1313 size_t prec = get_format_prec (&format);
1315 out_count += MAX (width, prec);
1317 get_format_conv_type (&format);
1322 error (EXIT_FAILURE, 0,
1323 _("missing %% conversion specification in suffix"));
1329 main (int argc, char **argv)
1332 unsigned long int val;
1334 initialize_main (&argc, &argv);
1335 program_name = argv[0];
1336 setlocale (LC_ALL, "");
1337 bindtextdomain (PACKAGE, LOCALEDIR);
1338 textdomain (PACKAGE);
1340 atexit (close_stdout);
1345 suppress_count = false;
1346 remove_files = true;
1347 prefix = DEFAULT_PREFIX;
1349 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1361 remove_files = false;
1365 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1367 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1373 suppress_count = true;
1377 elide_empty_files = true;
1380 case_GETOPT_HELP_CHAR;
1382 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1385 usage (EXIT_FAILURE);
1388 if (argc - optind < 2)
1391 error (0, 0, _("missing operand"));
1393 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1394 usage (EXIT_FAILURE);
1398 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1400 filename_space = xmalloc (strlen (prefix) + digits + 2);
1402 set_input_file (argv[optind++]);
1404 parse_patterns (argc, optind, argv);
1408 static int const sig[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM };
1409 enum { nsigs = sizeof sig / sizeof sig[0] };
1412 struct sigaction act;
1414 sigemptyset (&caught_signals);
1415 for (i = 0; i < nsigs; i++)
1417 sigaction (sig[i], NULL, &act);
1418 if (act.sa_handler != SIG_IGN)
1419 sigaddset (&caught_signals, sig[i]);
1422 act.sa_handler = interrupt_handler;
1423 act.sa_mask = caught_signals;
1426 for (i = 0; i < nsigs; i++)
1427 if (sigismember (&caught_signals, sig[i]))
1428 sigaction (sig[i], &act, NULL);
1430 for (i = 0; i < nsigs; i++)
1431 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1433 signal (sig[i], interrupt_handler);
1434 siginterrupt (sig[i], 1);
1441 if (close (input_desc) < 0)
1443 error (0, errno, _("read error"));
1447 exit (EXIT_SUCCESS);
1453 if (status != EXIT_SUCCESS)
1454 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1459 Usage: %s [OPTION]... FILE PATTERN...\n\
1463 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1464 and output byte counts of each piece to standard output.\n\
1468 Mandatory arguments to long options are mandatory for short options too.\n\
1471 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1472 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1473 -k, --keep-files do not remove output files on errors\n\
1476 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1477 -s, --quiet, --silent do not print counts of output file sizes\n\
1478 -z, --elide-empty-files remove empty output files\n\
1480 fputs (HELP_OPTION_DESCRIPTION, stdout);
1481 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1484 Read standard input if FILE is -. Each PATTERN may be:\n\
1488 INTEGER copy up to but not including specified line number\n\
1489 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1490 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1491 {INTEGER} repeat the previous pattern specified number of times\n\
1492 {*} repeat the previous pattern as many times as possible\n\
1494 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1496 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);