1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
23 #include <sys/types.h>
31 #include "fd-reopen.h"
34 #include "safe-read.h"
38 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
41 # define SA_NOCLDSTOP 0
42 # define sigprocmask(How, Set, Oset) /* empty */
44 # if ! HAVE_SIGINTERRUPT
45 # define siginterrupt(sig, flag) /* empty */
49 /* The official name of this program (e.g., no `g' prefix). */
50 #define PROGRAM_NAME "csplit"
53 proper_name ("Stuart Kemp"), \
54 proper_name ("David MacKenzie")
56 /* Increment size of area for control records. */
59 /* The default prefix for output file names. */
60 #define DEFAULT_PREFIX "xx"
62 /* A compiled pattern arg. */
65 intmax_t offset; /* Offset from regexp to split at. */
66 uintmax_t lines_required; /* Number of lines required. */
67 uintmax_t repeat; /* Repeat count. */
68 int argnum; /* ARGV index. */
69 bool repeat_forever; /* True if `*' used as a repeat count. */
70 bool ignore; /* If true, produce no output (for regexp). */
71 bool regexpr; /* True if regular expression was used. */
72 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
75 /* Initial size of data area in buffers. */
76 #define START_SIZE 8191
78 /* Increment size for data area. */
79 #define INCR_SIZE 2048
81 /* Number of lines kept in each node in line list. */
85 /* Some small values to test the algorithms. */
86 # define START_SIZE 200
91 /* A string with a length count. */
98 /* Pointers to the beginnings of lines in the buffer area.
99 These structures are linked together if needed. */
102 size_t used; /* Number of offsets used in this struct. */
103 size_t insert_index; /* Next offset to use when inserting line. */
104 size_t retrieve_index; /* Next index to use when retrieving line. */
105 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
106 struct line *next; /* Next in linked list. */
109 /* The structure to hold the input lines.
110 Contains a pointer to the data area and a list containing
111 pointers to the individual lines. */
114 size_t bytes_alloc; /* Size of the buffer area. */
115 size_t bytes_used; /* Bytes used in the buffer area. */
116 uintmax_t start_line; /* First line number in this buffer. */
117 uintmax_t first_available; /* First line that can be retrieved. */
118 size_t num_lines; /* Number of complete lines in this buffer. */
119 char *buffer; /* Data area. */
120 struct line *line_start; /* Head of list of pointers to lines. */
121 struct line *curr_line; /* The line start record currently in use. */
122 struct buffer_record *next;
125 static void close_output_file (void);
126 static void create_output_file (void);
127 static void delete_all_files (bool);
128 static void save_line_to_file (const struct cstring *line);
129 void usage (int status);
131 /* The name this program was run with. */
132 char const *program_name;
134 /* Start of buffer list. */
135 static struct buffer_record *head = NULL;
137 /* Partially read line. */
138 static char *hold_area = NULL;
140 /* Number of bytes in `hold_area'. */
141 static size_t hold_count = 0;
143 /* Number of the last line in the buffers. */
144 static uintmax_t last_line_number = 0;
146 /* Number of the line currently being examined. */
147 static uintmax_t current_line = 0;
149 /* If true, we have read EOF. */
150 static bool have_read_eof = false;
152 /* Name of output files. */
153 static char *volatile filename_space = NULL;
155 /* Prefix part of output file names. */
156 static char const *volatile prefix = NULL;
158 /* Suffix part of output file names. */
159 static char *volatile suffix = NULL;
161 /* Number of digits to use in output file names. */
162 static int volatile digits = 2;
164 /* Number of files created so far. */
165 static unsigned int volatile files_created = 0;
167 /* Number of bytes written to current file. */
168 static uintmax_t bytes_written;
170 /* Output file pointer. */
171 static FILE *output_stream = NULL;
173 /* Output file name. */
174 static char *output_filename = NULL;
176 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
177 static char **global_argv;
179 /* If true, do not print the count of bytes in each output file. */
180 static bool suppress_count;
182 /* If true, remove output files on error. */
183 static bool volatile remove_files;
185 /* If true, remove all output files which have a zero length. */
186 static bool elide_empty_files;
188 /* The compiled pattern arguments, which determine how to split
190 static struct control *controls;
192 /* Number of elements in `controls'. */
193 static size_t control_used;
195 /* The set of signals that are caught. */
196 static sigset_t caught_signals;
198 static struct option const longopts[] =
200 {"digits", required_argument, NULL, 'n'},
201 {"quiet", no_argument, NULL, 'q'},
202 {"silent", no_argument, NULL, 's'},
203 {"keep-files", no_argument, NULL, 'k'},
204 {"elide-empty-files", no_argument, NULL, 'z'},
205 {"prefix", required_argument, NULL, 'f'},
206 {"suffix-format", required_argument, NULL, 'b'},
207 {GETOPT_HELP_OPTION_DECL},
208 {GETOPT_VERSION_OPTION_DECL},
212 /* Optionally remove files created so far; then exit.
213 Called when an error detected. */
220 close_output_file ();
222 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
223 delete_all_files (false);
224 sigprocmask (SIG_SETMASK, &oldset, NULL);
227 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
238 error (0, 0, "%s", _("memory exhausted"));
243 interrupt_handler (int sig)
246 signal (sig, SIG_IGN);
248 delete_all_files (true);
250 signal (sig, SIG_DFL);
254 /* Keep track of NUM bytes of a partial line in buffer START.
255 These bytes will be retrieved later when another large buffer is read. */
258 save_to_hold_area (char *start, size_t num)
265 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
266 Return the number of bytes read. */
269 read_input (char *dest, size_t max_n_bytes)
273 if (max_n_bytes == 0)
276 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
279 have_read_eof = true;
281 if (bytes_read == SAFE_READ_ERROR)
283 error (0, errno, _("read error"));
290 /* Initialize existing line record P. */
293 clear_line_control (struct line *p)
297 p->retrieve_index = 0;
300 /* Return a new, initialized line record. */
303 new_line_control (void)
305 struct line *p = xmalloc (sizeof *p);
308 clear_line_control (p);
313 /* Record LINE_START, which is the address of the start of a line
314 of length LINE_LEN in the large buffer, in the lines buffer of B. */
317 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
321 /* If there is no existing area to keep line info, get some. */
322 if (b->line_start == NULL)
323 b->line_start = b->curr_line = new_line_control ();
325 /* If existing area for lines is full, get more. */
326 if (b->curr_line->used == CTRL_SIZE)
328 b->curr_line->next = new_line_control ();
329 b->curr_line = b->curr_line->next;
334 /* Record the start of the line, and update counters. */
335 l->starts[l->insert_index].str = line_start;
336 l->starts[l->insert_index].len = line_len;
341 /* Scan the buffer in B for newline characters
342 and record the line start locations and lengths in B.
343 Return the number of lines found in this buffer.
345 There may be an incomplete line at the end of the buffer;
346 a pointer is kept to this area, which will be used when
347 the next buffer is filled. */
350 record_line_starts (struct buffer_record *b)
352 char *line_start; /* Start of current line. */
353 char *line_end; /* End of each line found. */
354 size_t bytes_left; /* Length of incomplete last line. */
355 size_t lines; /* Number of lines found. */
356 size_t line_length; /* Length of each line found. */
358 if (b->bytes_used == 0)
362 line_start = b->buffer;
363 bytes_left = b->bytes_used;
367 line_end = memchr (line_start, '\n', bytes_left);
368 if (line_end == NULL)
370 line_length = line_end - line_start + 1;
371 keep_new_line (b, line_start, line_length);
372 bytes_left -= line_length;
373 line_start = line_end + 1;
377 /* Check for an incomplete last line. */
382 keep_new_line (b, line_start, bytes_left);
386 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
389 b->num_lines = lines;
390 b->first_available = b->start_line = last_line_number + 1;
391 last_line_number += lines;
396 /* Return a new buffer with room to store SIZE bytes, plus
397 an extra byte for safety. */
399 static struct buffer_record *
400 create_new_buffer (size_t size)
402 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
404 new_buffer->buffer = xmalloc (size + 1);
406 new_buffer->bytes_alloc = size;
407 new_buffer->line_start = new_buffer->curr_line = NULL;
412 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
413 least that size is currently free, use it, otherwise create a new one. */
415 static struct buffer_record *
416 get_new_buffer (size_t min_size)
418 struct buffer_record *new_buffer; /* Buffer to return. */
419 size_t alloc_size; /* Actual size that will be requested. */
421 alloc_size = START_SIZE;
422 if (alloc_size < min_size)
424 size_t s = min_size - alloc_size + INCR_SIZE - 1;
425 alloc_size += s - s % INCR_SIZE;
428 new_buffer = create_new_buffer (alloc_size);
430 new_buffer->num_lines = 0;
431 new_buffer->bytes_used = 0;
432 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
433 new_buffer->next = NULL;
439 free_buffer (struct buffer_record *buf)
445 /* Append buffer BUF to the linked list of buffers that contain
446 some data yet to be processed. */
449 save_buffer (struct buffer_record *buf)
451 struct buffer_record *p;
454 buf->curr_line = buf->line_start;
460 for (p = head; p->next; p = p->next)
466 /* Fill a buffer of input.
468 Set the initial size of the buffer to a default.
469 Fill the buffer (from the hold area and input stream)
470 and find the individual lines.
471 If no lines are found (the buffer is too small to hold the next line),
472 release the current buffer (whose contents would have been put in the
473 hold area) and repeat the process with another large buffer until at least
474 one entire line has been read.
476 Return true if a new buffer was obtained, otherwise false
477 (in which case end-of-file must have been encountered). */
482 struct buffer_record *b;
483 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
484 size_t bytes_avail; /* Size of new buffer created. */
485 size_t lines_found; /* Number of lines in this new buffer. */
486 char *p; /* Place to load into buffer. */
491 /* We must make the buffer at least as large as the amount of data
492 in the partial line left over from the last call. */
493 if (bytes_wanted < hold_count)
494 bytes_wanted = hold_count;
498 b = get_new_buffer (bytes_wanted);
499 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
502 /* First check the `holding' area for a partial line. */
505 memcpy (p, hold_area, hold_count);
507 b->bytes_used += hold_count;
508 bytes_avail -= hold_count;
512 b->bytes_used += read_input (p, bytes_avail);
514 lines_found = record_line_starts (b);
518 if (lines_found || have_read_eof)
521 if (xalloc_oversized (2, b->bytes_alloc))
523 bytes_wanted = 2 * b->bytes_alloc;
533 return lines_found != 0;
536 /* Return the line number of the first line that has not yet been retrieved. */
539 get_first_line_in_buffer (void)
541 if (head == NULL && !load_buffer ())
542 error (EXIT_FAILURE, errno, _("input disappeared"));
544 return head->first_available;
547 /* Return a pointer to the logical first line in the buffer and make the
548 next line the logical first line.
549 Return NULL if there is no more input. */
551 static struct cstring *
554 /* If non-NULL, this is the buffer for which the previous call
555 returned the final line. So now, presuming that line has been
556 processed, we can free the buffer and reset this pointer. */
557 static struct buffer_record *prev_buf = NULL;
559 struct cstring *line; /* Return value. */
560 struct line *l; /* For convenience. */
564 free_buffer (prev_buf);
568 if (head == NULL && !load_buffer ())
571 if (current_line < head->first_available)
572 current_line = head->first_available;
574 ++(head->first_available);
578 line = &l->starts[l->retrieve_index];
580 /* Advance index to next line. */
581 if (++l->retrieve_index == l->used)
583 /* Go on to the next line record. */
584 head->curr_line = l->next;
585 if (head->curr_line == NULL || head->curr_line->used == 0)
587 /* Go on to the next data block.
588 but first record the current one so we can free it
589 once the line we're returning has been processed. */
598 /* Search the buffers for line LINENUM, reading more input if necessary.
599 Return a pointer to the line, or NULL if it is not found in the file. */
601 static struct cstring *
602 find_line (uintmax_t linenum)
604 struct buffer_record *b;
606 if (head == NULL && !load_buffer ())
609 if (linenum < head->start_line)
614 if (linenum < b->start_line + b->num_lines)
616 /* The line is in this buffer. */
618 size_t offset; /* How far into the buffer the line is. */
621 offset = linenum - b->start_line;
622 /* Find the control record. */
623 while (offset >= CTRL_SIZE)
628 return &l->starts[offset];
630 if (b->next == NULL && !load_buffer ())
632 b = b->next; /* Try the next data block. */
636 /* Return true if at least one more line is available for input. */
641 return find_line (current_line + 1) == NULL;
644 /* Open NAME as standard input. */
647 set_input_file (const char *name)
649 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
650 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
653 /* Write all lines from the beginning of the buffer up to, but
654 not including, line LAST_LINE, to the current output file.
655 If IGNORE is true, do not output lines selected here.
656 ARGNUM is the index in ARGV of the current pattern. */
659 write_to_file (uintmax_t last_line, bool ignore, int argnum)
661 struct cstring *line;
662 uintmax_t first_line; /* First available input line. */
663 uintmax_t lines; /* Number of lines to output. */
666 first_line = get_first_line_in_buffer ();
668 if (first_line > last_line)
670 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
674 lines = last_line - first_line;
676 for (i = 0; i < lines; i++)
678 line = remove_line ();
681 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
685 save_line_to_file (line);
689 /* Output any lines left after all regexps have been processed. */
692 dump_rest_of_file (void)
694 struct cstring *line;
696 while ((line = remove_line ()) != NULL)
697 save_line_to_file (line);
700 /* Handle an attempt to read beyond EOF under the control of record P,
701 on iteration REPETITION if nonzero. */
703 static void handle_line_error (const struct control *, uintmax_t)
706 handle_line_error (const struct control *p, uintmax_t repetition)
708 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
710 fprintf (stderr, _("%s: %s: line number out of range"),
711 program_name, quote (umaxtostr (p->lines_required, buf)));
713 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
715 fprintf (stderr, "\n");
720 /* Determine the line number that marks the end of this file,
721 then get those lines and save them to the output file.
722 P is the control record.
723 REPETITION is the repetition number. */
726 process_line_count (const struct control *p, uintmax_t repetition)
729 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
730 struct cstring *line;
732 create_output_file ();
734 linenum = get_first_line_in_buffer ();
736 while (linenum++ < last_line_to_save)
738 line = remove_line ();
740 handle_line_error (p, repetition);
741 save_line_to_file (line);
744 close_output_file ();
746 /* Ensure that the line number specified is not 1 greater than
747 the number of lines in the file. */
748 if (no_more_lines ())
749 handle_line_error (p, repetition);
752 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
754 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
756 fprintf (stderr, _("%s: %s: match not found"),
757 program_name, quote (global_argv[p->argnum]));
761 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
762 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
765 fprintf (stderr, "\n");
769 dump_rest_of_file ();
770 close_output_file ();
775 /* Read the input until a line matches the regexp in P, outputting
776 it unless P->IGNORE is true.
777 REPETITION is this repeat-count; 0 means the first time. */
780 process_regexp (struct control *p, uintmax_t repetition)
782 struct cstring *line; /* From input file. */
783 size_t line_len; /* To make "$" in regexps work. */
784 uintmax_t break_line; /* First line number of next file. */
785 bool ignore = p->ignore; /* If true, skip this section. */
789 create_output_file ();
791 /* If there is no offset for the regular expression, or
792 it is positive, then it is not necessary to buffer the lines. */
798 line = find_line (++current_line);
801 if (p->repeat_forever)
805 dump_rest_of_file ();
806 close_output_file ();
811 regexp_error (p, repetition, ignore);
813 line_len = line->len;
814 if (line->str[line_len - 1] == '\n')
816 ret = re_search (&p->re_compiled, line->str, line_len,
820 error (0, 0, _("error in regular expression search"));
825 line = remove_line ();
827 save_line_to_file (line);
835 /* Buffer the lines. */
838 line = find_line (++current_line);
841 if (p->repeat_forever)
845 dump_rest_of_file ();
846 close_output_file ();
851 regexp_error (p, repetition, ignore);
853 line_len = line->len;
854 if (line->str[line_len - 1] == '\n')
856 ret = re_search (&p->re_compiled, line->str, line_len,
860 error (0, 0, _("error in regular expression search"));
868 /* Account for any offset from this regexp. */
869 break_line = current_line + p->offset;
871 write_to_file (break_line, ignore, p->argnum);
874 close_output_file ();
877 current_line = break_line;
880 /* Split the input file according to the control records we have built. */
887 for (i = 0; i < control_used; i++)
890 if (controls[i].regexpr)
892 for (j = 0; (controls[i].repeat_forever
893 || j <= controls[i].repeat); j++)
894 process_regexp (&controls[i], j);
898 for (j = 0; (controls[i].repeat_forever
899 || j <= controls[i].repeat); j++)
900 process_line_count (&controls[i], j);
904 create_output_file ();
905 dump_rest_of_file ();
906 close_output_file ();
909 /* Return the name of output file number NUM.
911 This function is called from a signal handler, so it should invoke
912 only reentrant functions that are async-signal-safe. POSIX does
913 not guarantee this for the functions called below, but we don't
914 know of any hosts where this implementation isn't safe. */
917 make_filename (unsigned int num)
919 strcpy (filename_space, prefix);
921 sprintf (filename_space + strlen (prefix), suffix, num);
923 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
924 return filename_space;
927 /* Create the next output file. */
930 create_output_file (void)
936 output_filename = make_filename (files_created);
938 /* Create the output file in a critical section, to avoid races. */
939 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
940 output_stream = fopen (output_filename, "w");
941 fopen_ok = (output_stream != NULL);
943 files_created += fopen_ok;
944 sigprocmask (SIG_SETMASK, &oldset, NULL);
948 error (0, fopen_errno, "%s", output_filename);
954 /* If requested, delete all the files we have created. This function
955 must be called only from critical sections. */
958 delete_all_files (bool in_signal_handler)
965 for (i = 0; i < files_created; i++)
967 const char *name = make_filename (i);
968 if (unlink (name) != 0 && !in_signal_handler)
969 error (0, errno, "%s", name);
975 /* Close the current output file and print the count
976 of characters in this file. */
979 close_output_file (void)
983 if (ferror (output_stream))
985 error (0, 0, _("write error for %s"), quote (output_filename));
986 output_stream = NULL;
989 if (fclose (output_stream) != 0)
991 error (0, errno, "%s", output_filename);
992 output_stream = NULL;
995 if (bytes_written == 0 && elide_empty_files)
1001 /* Remove the output file in a critical section, to avoid races. */
1002 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1003 unlink_ok = (unlink (output_filename) == 0);
1004 unlink_errno = errno;
1005 files_created -= unlink_ok;
1006 sigprocmask (SIG_SETMASK, &oldset, NULL);
1009 error (0, unlink_errno, "%s", output_filename);
1013 if (!suppress_count)
1015 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1016 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1019 output_stream = NULL;
1023 /* Save line LINE to the output file and
1024 increment the character count for the current file. */
1027 save_line_to_file (const struct cstring *line)
1029 fwrite (line->str, sizeof (char), line->len, output_stream);
1030 bytes_written += line->len;
1033 /* Return a new, initialized control record. */
1035 static struct control *
1036 new_control_record (void)
1038 static size_t control_allocated = 0; /* Total space allocated. */
1041 if (control_used == control_allocated)
1042 controls = X2NREALLOC (controls, &control_allocated);
1043 p = &controls[control_used++];
1046 p->repeat_forever = false;
1047 p->lines_required = 0;
1052 /* Check if there is a numeric offset after a regular expression.
1053 STR is the entire command line argument.
1054 P is the control record for this regular expression.
1055 NUM is the numeric part of STR. */
1058 check_for_offset (struct control *p, const char *str, const char *num)
1060 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1061 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1064 /* Given that the first character of command line arg STR is '{',
1065 make sure that the rest of the string is a valid repeat count
1066 and store its value in P.
1067 ARGNUM is the ARGV index of STR. */
1070 parse_repeat_count (int argnum, struct control *p, char *str)
1075 end = str + strlen (str) - 1;
1077 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1080 if (str+1 == end-1 && *(str+1) == '*')
1081 p->repeat_forever = true;
1084 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1086 error (EXIT_FAILURE, 0,
1087 _("%s}: integer required between `{' and `}'"),
1088 global_argv[argnum]);
1096 /* Extract the regular expression from STR and check for a numeric offset.
1097 STR should start with the regexp delimiter character.
1098 Return a new control record for the regular expression.
1099 ARGNUM is the ARGV index of STR.
1100 Unless IGNORE is true, mark these lines for output. */
1102 static struct control *
1103 extract_regexp (int argnum, bool ignore, char const *str)
1105 size_t len; /* Number of bytes in this regexp. */
1107 char const *closing_delim;
1111 closing_delim = strrchr (str + 1, delim);
1112 if (closing_delim == NULL)
1113 error (EXIT_FAILURE, 0,
1114 _("%s: closing delimiter `%c' missing"), str, delim);
1116 len = closing_delim - str - 1;
1117 p = new_control_record ();
1122 p->re_compiled.buffer = NULL;
1123 p->re_compiled.allocated = 0;
1124 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1125 p->re_compiled.translate = NULL;
1127 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1128 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1131 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1135 if (closing_delim[1])
1136 check_for_offset (p, str, closing_delim + 1);
1141 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1142 After each pattern, check if the next argument is a repeat count. */
1145 parse_patterns (int argc, int start, char **argv)
1147 int i; /* Index into ARGV. */
1148 struct control *p; /* New control record created. */
1150 static uintmax_t last_val = 0;
1152 for (i = start; i < argc; i++)
1154 if (*argv[i] == '/' || *argv[i] == '%')
1156 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1160 p = new_control_record ();
1163 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1164 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1166 error (EXIT_FAILURE, 0,
1167 _("%s: line number must be greater than zero"),
1171 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1172 error (EXIT_FAILURE, 0,
1173 _("line number %s is smaller than preceding line number, %s"),
1174 quote (argv[i]), umaxtostr (last_val, buf));
1177 if (val == last_val)
1179 _("warning: line number %s is the same as preceding line number"),
1184 p->lines_required = val;
1187 if (i + 1 < argc && *argv[i + 1] == '{')
1189 /* We have a repeat count. */
1191 parse_repeat_count (i, p, argv[i]);
1197 get_format_flags (char **format_ptr)
1199 unsigned int count = 0;
1201 for (; **format_ptr; (*format_ptr)++)
1203 switch (**format_ptr)
1214 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1225 get_format_width (char **format_ptr)
1227 unsigned long int val = 0;
1229 if (ISDIGIT (**format_ptr)
1230 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1232 error (EXIT_FAILURE, 0, _("invalid format width"));
1234 /* Allow for enough octal digits to represent the value of UINT_MAX,
1235 even if the field width is less than that. */
1236 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1240 get_format_prec (char **format_ptr)
1242 if (**format_ptr != '.')
1246 if (! ISDIGIT (**format_ptr))
1250 unsigned long int val;
1251 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1253 error (EXIT_FAILURE, 0, _("invalid format precision"));
1259 get_format_conv_type (char **format_ptr)
1261 unsigned char ch = *(*format_ptr)++;
1274 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1279 error (EXIT_FAILURE, 0,
1280 _("invalid conversion specifier in suffix: %c"), ch);
1282 error (EXIT_FAILURE, 0,
1283 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1288 max_out (char *format)
1290 size_t out_count = 0;
1291 bool percent = false;
1295 if (*format++ != '%')
1297 else if (*format == '%')
1305 error (EXIT_FAILURE, 0,
1306 _("too many %% conversion specifications in suffix"));
1308 out_count += get_format_flags (&format);
1310 size_t width = get_format_width (&format);
1311 size_t prec = get_format_prec (&format);
1313 out_count += MAX (width, prec);
1315 get_format_conv_type (&format);
1320 error (EXIT_FAILURE, 0,
1321 _("missing %% conversion specification in suffix"));
1327 main (int argc, char **argv)
1330 unsigned long int val;
1332 initialize_main (&argc, &argv);
1333 program_name = argv[0];
1334 setlocale (LC_ALL, "");
1335 bindtextdomain (PACKAGE, LOCALEDIR);
1336 textdomain (PACKAGE);
1338 atexit (close_stdout);
1343 suppress_count = false;
1344 remove_files = true;
1345 prefix = DEFAULT_PREFIX;
1347 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1359 remove_files = false;
1363 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1365 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1371 suppress_count = true;
1375 elide_empty_files = true;
1378 case_GETOPT_HELP_CHAR;
1380 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1383 usage (EXIT_FAILURE);
1386 if (argc - optind < 2)
1389 error (0, 0, _("missing operand"));
1391 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1392 usage (EXIT_FAILURE);
1396 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1398 filename_space = xmalloc (strlen (prefix) + digits + 2);
1400 set_input_file (argv[optind++]);
1402 parse_patterns (argc, optind, argv);
1406 static int const sig[] =
1408 /* The usual suspects. */
1409 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1426 enum { nsigs = sizeof sig / sizeof sig[0] };
1429 struct sigaction act;
1431 sigemptyset (&caught_signals);
1432 for (i = 0; i < nsigs; i++)
1434 sigaction (sig[i], NULL, &act);
1435 if (act.sa_handler != SIG_IGN)
1436 sigaddset (&caught_signals, sig[i]);
1439 act.sa_handler = interrupt_handler;
1440 act.sa_mask = caught_signals;
1443 for (i = 0; i < nsigs; i++)
1444 if (sigismember (&caught_signals, sig[i]))
1445 sigaction (sig[i], &act, NULL);
1447 for (i = 0; i < nsigs; i++)
1448 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1450 signal (sig[i], interrupt_handler);
1451 siginterrupt (sig[i], 1);
1458 if (close (STDIN_FILENO) != 0)
1460 error (0, errno, _("read error"));
1464 exit (EXIT_SUCCESS);
1470 if (status != EXIT_SUCCESS)
1471 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1476 Usage: %s [OPTION]... FILE PATTERN...\n\
1480 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1481 and output byte counts of each piece to standard output.\n\
1485 Mandatory arguments to long options are mandatory for short options too.\n\
1488 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1489 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1490 -k, --keep-files do not remove output files on errors\n\
1493 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1494 -s, --quiet, --silent do not print counts of output file sizes\n\
1495 -z, --elide-empty-files remove empty output files\n\
1497 fputs (HELP_OPTION_DESCRIPTION, stdout);
1498 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1501 Read standard input if FILE is -. Each PATTERN may be:\n\
1505 INTEGER copy up to but not including specified line number\n\
1506 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1507 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1508 {INTEGER} repeat the previous pattern specified number of times\n\
1509 {*} repeat the previous pattern as many times as possible\n\
1511 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1513 emit_bug_reporting_address ();