1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
26 #include <sys/types.h>
34 #include "long-options.h"
35 #include "safe-read.h"
39 /* The official name of this program (e.g., no `g' prefix). */
40 #define PROGRAM_NAME "csplit"
47 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
55 /* Increment size of area for control records. */
58 /* The default prefix for output file names. */
59 #define DEFAULT_PREFIX "xx"
63 /* A compiled pattern arg. */
66 char *regexpr; /* Non-compiled regular expression. */
67 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
68 int offset; /* Offset from regexp to split at. */
69 int lines_required; /* Number of lines required. */
70 unsigned int repeat; /* Repeat count. */
71 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
72 int argnum; /* ARGV index. */
73 boolean ignore; /* If true, produce no output (for regexp). */
76 /* Initial size of data area in buffers. */
77 #define START_SIZE 8191
79 /* Increment size for data area. */
80 #define INCR_SIZE 2048
82 /* Number of lines kept in each node in line list. */
86 /* Some small values to test the algorithms. */
87 # define START_SIZE 200
92 /* A string with a length count. */
99 /* Pointers to the beginnings of lines in the buffer area.
100 These structures are linked together if needed. */
103 unsigned used; /* Number of offsets used in this struct. */
104 unsigned insert_index; /* Next offset to use when inserting line. */
105 unsigned retrieve_index; /* Next index to use when retrieving line. */
106 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
107 struct line *next; /* Next in linked list. */
110 /* The structure to hold the input lines.
111 Contains a pointer to the data area and a list containing
112 pointers to the individual lines. */
115 unsigned bytes_alloc; /* Size of the buffer area. */
116 unsigned bytes_used; /* Bytes used in the buffer area. */
117 unsigned start_line; /* First line number in this buffer. */
118 unsigned first_available; /* First line that can be retrieved. */
119 unsigned num_lines; /* Number of complete lines in this buffer. */
120 char *buffer; /* Data area. */
121 struct line *line_start; /* Head of list of pointers to lines. */
122 struct line *curr_line; /* The line start record currently in use. */
123 struct buffer_record *next;
126 static void close_output_file PARAMS ((void));
127 static void create_output_file PARAMS ((void));
128 static void delete_all_files PARAMS ((void));
129 static void save_line_to_file PARAMS ((const struct cstring *line));
130 void usage PARAMS ((int status));
132 /* The name this program was run with. */
135 /* Convert the number of 8-bit bytes of a binary representation to
136 the number of characters required to represent the same quantity
137 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
138 require a field width as wide as 11 characters. */
139 static const unsigned int bytes_to_octal_digits[] =
140 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
142 /* Input file descriptor. */
143 static int input_desc = 0;
145 /* List of available buffers. */
146 static struct buffer_record *free_list = NULL;
148 /* Start of buffer list. */
149 static struct buffer_record *head = NULL;
151 /* Partially read line. */
152 static char *hold_area = NULL;
154 /* Number of chars in `hold_area'. */
155 static unsigned hold_count = 0;
157 /* Number of the last line in the buffers. */
158 static unsigned last_line_number = 0;
160 /* Number of the line currently being examined. */
161 static unsigned current_line = 0;
163 /* If TRUE, we have read EOF. */
164 static boolean have_read_eof = FALSE;
166 /* Name of output files. */
167 static char *filename_space = NULL;
169 /* Prefix part of output file names. */
170 static char *prefix = NULL;
172 /* Suffix part of output file names. */
173 static char *suffix = NULL;
175 /* Number of digits to use in output file names. */
176 static int digits = 2;
178 /* Number of files created so far. */
179 static unsigned int files_created = 0;
181 /* Number of bytes written to current file. */
182 static unsigned int bytes_written;
184 /* Output file pointer. */
185 static FILE *output_stream = NULL;
187 /* Output file name. */
188 static char *output_filename = NULL;
190 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
191 static char **global_argv;
193 /* If TRUE, do not print the count of bytes in each output file. */
194 static boolean suppress_count;
196 /* If TRUE, remove output files on error. */
197 static boolean remove_files;
199 /* If TRUE, remove all output files which have a zero length. */
200 static boolean elide_empty_files;
202 /* The compiled pattern arguments, which determine how to split
204 static struct control *controls;
206 /* Number of elements in `controls'. */
207 static unsigned int control_used;
209 static struct option const longopts[] =
211 {"digits", required_argument, NULL, 'n'},
212 {"quiet", no_argument, NULL, 'q'},
213 {"silent", no_argument, NULL, 's'},
214 {"keep-files", no_argument, NULL, 'k'},
215 {"elide-empty-files", no_argument, NULL, 'z'},
216 {"prefix", required_argument, NULL, 'f'},
217 {"suffix-format", required_argument, NULL, 'b'},
221 /* Optionally remove files created so far; then exit.
222 Called when an error detected. */
228 close_output_file ();
242 interrupt_handler (int sig)
245 struct sigaction sigact;
247 sigact.sa_handler = SIG_DFL;
248 sigemptyset (&sigact.sa_mask);
250 sigaction (sig, &sigact, NULL);
251 #else /* !SA_INTERRUPT */
252 signal (sig, SIG_DFL);
253 #endif /* SA_INTERRUPT */
255 kill (getpid (), sig);
258 /* Keep track of NUM chars of a partial line in buffer START.
259 These chars will be retrieved later when another large buffer is read.
260 It is not necessary to create a new buffer for these chars; instead,
261 we keep a pointer to the existing buffer. This buffer *is* on the
262 free list, and when the next buffer is obtained from this list
263 (even if it is this one), these chars will be placed at the
264 start of the new buffer. */
267 save_to_hold_area (char *start, unsigned int num)
273 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
274 Return the number of chars read. */
277 read_input (char *dest, unsigned int max_n_bytes)
281 if (max_n_bytes == 0)
284 bytes_read = safe_read (input_desc, dest, max_n_bytes);
287 have_read_eof = TRUE;
291 error (0, errno, _("read error"));
298 /* Initialize existing line record P. */
301 clear_line_control (struct line *p)
305 p->retrieve_index = 0;
308 /* Initialize all line records in B. */
311 clear_all_line_control (struct buffer_record *b)
315 for (l = b->line_start; l; l = l->next)
316 clear_line_control (l);
319 /* Return a new, initialized line record. */
322 new_line_control (void)
326 p = (struct line *) xmalloc (sizeof (struct line));
329 clear_line_control (p);
334 /* Record LINE_START, which is the address of the start of a line
335 of length LINE_LEN in the large buffer, in the lines buffer of B. */
338 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
342 /* If there is no existing area to keep line info, get some. */
343 if (b->line_start == NULL)
344 b->line_start = b->curr_line = new_line_control ();
346 /* If existing area for lines is full, get more. */
347 if (b->curr_line->used == CTRL_SIZE)
349 b->curr_line->next = new_line_control ();
350 b->curr_line = b->curr_line->next;
355 /* Record the start of the line, and update counters. */
356 l->starts[l->insert_index].str = line_start;
357 l->starts[l->insert_index].len = line_len;
362 /* Scan the buffer in B for newline characters
363 and record the line start locations and lengths in B.
364 Return the number of lines found in this buffer.
366 There may be an incomplete line at the end of the buffer;
367 a pointer is kept to this area, which will be used when
368 the next buffer is filled. */
371 record_line_starts (struct buffer_record *b)
373 char *line_start; /* Start of current line. */
374 char *line_end; /* End of each line found. */
375 unsigned int bytes_left; /* Length of incomplete last line. */
376 unsigned int lines; /* Number of lines found. */
377 unsigned int line_length; /* Length of each line found. */
379 if (b->bytes_used == 0)
383 line_start = b->buffer;
384 bytes_left = b->bytes_used;
388 line_end = memchr (line_start, '\n', bytes_left);
389 if (line_end == NULL)
391 line_length = line_end - line_start + 1;
392 keep_new_line (b, line_start, line_length);
393 bytes_left -= line_length;
394 line_start = line_end + 1;
398 /* Check for an incomplete last line. */
403 keep_new_line (b, line_start, bytes_left);
407 save_to_hold_area (line_start, bytes_left);
410 b->num_lines = lines;
411 b->first_available = b->start_line = last_line_number + 1;
412 last_line_number += lines;
417 /* Return a new buffer with room to store SIZE bytes, plus
418 an extra byte for safety. */
420 static struct buffer_record *
421 create_new_buffer (unsigned int size)
423 struct buffer_record *new_buffer;
425 new_buffer = (struct buffer_record *)
426 xmalloc (sizeof (struct buffer_record));
428 new_buffer->buffer = (char *) xmalloc (size + 1);
430 new_buffer->bytes_alloc = size;
431 new_buffer->line_start = new_buffer->curr_line = NULL;
436 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
437 least that size is currently free, use it, otherwise create a new one. */
439 static struct buffer_record *
440 get_new_buffer (unsigned int min_size)
442 struct buffer_record *p, *q;
443 struct buffer_record *new_buffer; /* Buffer to return. */
444 unsigned int alloc_size; /* Actual size that will be requested. */
446 alloc_size = START_SIZE;
447 while (min_size > alloc_size)
448 alloc_size += INCR_SIZE;
450 if (free_list == NULL)
451 new_buffer = create_new_buffer (alloc_size);
454 /* Use first-fit to find a buffer. */
455 p = new_buffer = NULL;
460 if (q->bytes_alloc >= min_size)
473 new_buffer = (q ? q : create_new_buffer (alloc_size));
475 new_buffer->curr_line = new_buffer->line_start;
476 clear_all_line_control (new_buffer);
479 new_buffer->num_lines = 0;
480 new_buffer->bytes_used = 0;
481 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
482 new_buffer->next = NULL;
487 /* Add buffer BUF to the list of free buffers. */
490 free_buffer (struct buffer_record *buf)
492 buf->next = free_list;
496 /* Append buffer BUF to the linked list of buffers that contain
497 some data yet to be processed. */
500 save_buffer (struct buffer_record *buf)
502 struct buffer_record *p;
505 buf->curr_line = buf->line_start;
511 for (p = head; p->next; p = p->next)
517 /* Fill a buffer of input.
519 Set the initial size of the buffer to a default.
520 Fill the buffer (from the hold area and input stream)
521 and find the individual lines.
522 If no lines are found (the buffer is too small to hold the next line),
523 release the current buffer (whose contents would have been put in the
524 hold area) and repeat the process with another large buffer until at least
525 one entire line has been read.
527 Return TRUE if a new buffer was obtained, otherwise false
528 (in which case end-of-file must have been encountered). */
533 struct buffer_record *b;
534 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
535 unsigned int bytes_avail; /* Size of new buffer created. */
536 unsigned int lines_found; /* Number of lines in this new buffer. */
537 char *p; /* Place to load into buffer. */
542 /* We must make the buffer at least as large as the amount of data
543 in the partial line left over from the last call. */
544 if (bytes_wanted < hold_count)
545 bytes_wanted = hold_count;
549 b = get_new_buffer (bytes_wanted);
550 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
553 /* First check the `holding' area for a partial line. */
557 memcpy (p, hold_area, hold_count);
559 b->bytes_used += hold_count;
560 bytes_avail -= hold_count;
564 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
566 lines_found = record_line_starts (b);
567 bytes_wanted = b->bytes_alloc * 2;
571 while (!lines_found && !have_read_eof);
576 return lines_found != 0;
579 /* Return the line number of the first line that has not yet been retrieved. */
582 get_first_line_in_buffer (void)
584 if (head == NULL && !load_buffer ())
585 error (EXIT_FAILURE, errno, _("input disappeared"));
587 return head->first_available;
590 /* Return a pointer to the logical first line in the buffer and make the
591 next line the logical first line.
592 Return NULL if there is no more input. */
594 static struct cstring *
597 struct cstring *line; /* Return value. */
598 struct line *l; /* For convenience. */
600 if (head == NULL && !load_buffer ())
603 if (current_line < head->first_available)
604 current_line = head->first_available;
606 ++(head->first_available);
610 line = &l->starts[l->retrieve_index];
612 /* Advance index to next line. */
613 if (++l->retrieve_index == l->used)
615 /* Go on to the next line record. */
616 head->curr_line = l->next;
617 if (head->curr_line == NULL || head->curr_line->used == 0)
619 /* Go on to the next data block. */
620 struct buffer_record *b = head;
629 /* Search the buffers for line LINENUM, reading more input if necessary.
630 Return a pointer to the line, or NULL if it is not found in the file. */
632 static struct cstring *
633 find_line (unsigned int linenum)
635 struct buffer_record *b;
637 if (head == NULL && !load_buffer ())
640 if (linenum < head->start_line)
645 if (linenum < b->start_line + b->num_lines)
647 /* The line is in this buffer. */
649 unsigned int offset; /* How far into the buffer the line is. */
652 offset = linenum - b->start_line;
653 /* Find the control record. */
654 while (offset >= CTRL_SIZE)
659 return &l->starts[offset];
661 if (b->next == NULL && !load_buffer ())
663 b = b->next; /* Try the next data block. */
667 /* Return TRUE if at least one more line is available for input. */
672 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
675 /* Set the name of the input file to NAME and open it. */
678 set_input_file (const char *name)
680 if (STREQ (name, "-"))
684 input_desc = open (name, O_RDONLY);
686 error (EXIT_FAILURE, errno, "%s", name);
690 /* Write all lines from the beginning of the buffer up to, but
691 not including, line LAST_LINE, to the current output file.
692 If IGNORE is TRUE, do not output lines selected here.
693 ARGNUM is the index in ARGV of the current pattern. */
696 write_to_file (unsigned int last_line, boolean ignore, int argnum)
698 struct cstring *line;
699 unsigned int first_line; /* First available input line. */
700 unsigned int lines; /* Number of lines to output. */
703 first_line = get_first_line_in_buffer ();
705 if (first_line > last_line)
707 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
711 lines = last_line - first_line;
713 for (i = 0; i < lines; i++)
715 line = remove_line ();
718 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
722 save_line_to_file (line);
726 /* Output any lines left after all regexps have been processed. */
729 dump_rest_of_file (void)
731 struct cstring *line;
733 while ((line = remove_line ()) != NULL)
734 save_line_to_file (line);
737 /* Handle an attempt to read beyond EOF under the control of record P,
738 on iteration REPETITION if nonzero. */
741 handle_line_error (const struct control *p, int repetition)
743 fprintf (stderr, _("%s: `%d': line number out of range"),
744 program_name, p->lines_required);
746 fprintf (stderr, _(" on repetition %d\n"), repetition);
748 fprintf (stderr, "\n");
753 /* Determine the line number that marks the end of this file,
754 then get those lines and save them to the output file.
755 P is the control record.
756 REPETITION is the repetition number. */
759 process_line_count (const struct control *p, int repetition)
761 unsigned int linenum;
762 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
763 struct cstring *line;
765 create_output_file ();
767 linenum = get_first_line_in_buffer ();
769 /* Initially, I wanted to assert linenum < last_line_to_save, but that
770 condition is false for the valid command: echo | csplit - 1 '{*}'.
771 So, relax it just a little. */
772 assert ((linenum == 1 && last_line_to_save == 1)
773 || linenum < last_line_to_save);
775 while (linenum++ < last_line_to_save)
777 line = remove_line ();
779 handle_line_error (p, repetition);
780 save_line_to_file (line);
783 close_output_file ();
785 /* Ensure that the line number specified is not 1 greater than
786 the number of lines in the file. */
787 if (no_more_lines ())
788 handle_line_error (p, repetition);
792 regexp_error (struct control *p, int repetition, boolean ignore)
794 fprintf (stderr, _("%s: `%s': match not found"),
795 program_name, global_argv[p->argnum]);
798 fprintf (stderr, _(" on repetition %d\n"), repetition);
800 fprintf (stderr, "\n");
804 dump_rest_of_file ();
805 close_output_file ();
810 /* Read the input until a line matches the regexp in P, outputting
811 it unless P->IGNORE is TRUE.
812 REPETITION is this repeat-count; 0 means the first time. */
815 process_regexp (struct control *p, int repetition)
817 struct cstring *line; /* From input file. */
818 unsigned int line_len; /* To make "$" in regexps work. */
819 unsigned int break_line; /* First line number of next file. */
820 boolean ignore = p->ignore; /* If TRUE, skip this section. */
824 create_output_file ();
826 /* If there is no offset for the regular expression, or
827 it is positive, then it is not necessary to buffer the lines. */
833 line = find_line (++current_line);
836 if (p->repeat_forever)
840 dump_rest_of_file ();
841 close_output_file ();
846 regexp_error (p, repetition, ignore);
848 line_len = line->len;
849 if (line->str[line_len - 1] == '\n')
851 ret = re_search (&p->re_compiled, line->str, line_len,
852 0, line_len, (struct re_registers *) 0);
855 error (0, 0, _("error in regular expression search"));
860 line = remove_line ();
862 save_line_to_file (line);
870 /* Buffer the lines. */
873 line = find_line (++current_line);
876 if (p->repeat_forever)
880 dump_rest_of_file ();
881 close_output_file ();
886 regexp_error (p, repetition, ignore);
888 line_len = line->len;
889 if (line->str[line_len - 1] == '\n')
891 ret = re_search (&p->re_compiled, line->str, line_len,
892 0, line_len, (struct re_registers *) 0);
895 error (0, 0, _("error in regular expression search"));
903 /* Account for any offset from this regexp. */
904 break_line = current_line + p->offset;
906 write_to_file (break_line, ignore, p->argnum);
909 close_output_file ();
912 current_line = break_line;
915 /* Split the input file according to the control records we have built. */
922 for (i = 0; i < control_used; i++)
924 if (controls[i].regexpr)
926 for (j = 0; (controls[i].repeat_forever
927 || j <= controls[i].repeat); j++)
928 process_regexp (&controls[i], j);
932 for (j = 0; (controls[i].repeat_forever
933 || j <= controls[i].repeat); j++)
934 process_line_count (&controls[i], j);
938 create_output_file ();
939 dump_rest_of_file ();
940 close_output_file ();
943 /* Return the name of output file number NUM. */
946 make_filename (unsigned int num)
948 strcpy (filename_space, prefix);
950 sprintf (filename_space+strlen(prefix), suffix, num);
952 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
953 return filename_space;
956 /* Create the next output file. */
959 create_output_file (void)
961 output_filename = make_filename (files_created);
962 output_stream = fopen (output_filename, "w");
963 if (output_stream == NULL)
965 error (0, errno, "%s", output_filename);
972 /* Delete all the files we have created. */
975 delete_all_files (void)
980 for (i = 0; i < files_created; i++)
982 name = make_filename (i);
984 error (0, errno, "%s", name);
988 /* Close the current output file and print the count
989 of characters in this file. */
992 close_output_file (void)
996 if (ferror (output_stream) || fclose (output_stream) == EOF)
998 error (0, errno, _("write error for `%s'"), output_filename);
999 output_stream = NULL;
1002 if (bytes_written == 0 && elide_empty_files)
1004 if (unlink (output_filename))
1005 error (0, errno, "%s", output_filename);
1010 /* FIXME: if we write to stdout here, we have to close stdout
1011 and check for errors. */
1012 if (!suppress_count)
1013 fprintf (stdout, "%d\n", bytes_written);
1015 output_stream = NULL;
1019 /* Save line LINE to the output file and
1020 increment the character count for the current file. */
1023 save_line_to_file (const struct cstring *line)
1025 fwrite (line->str, sizeof (char), line->len, output_stream);
1026 bytes_written += line->len;
1029 /* Return a new, initialized control record. */
1031 static struct control *
1032 new_control_record (void)
1034 static unsigned control_allocated = 0; /* Total space allocated. */
1037 if (control_allocated == 0)
1039 control_allocated = ALLOC_SIZE;
1040 controls = (struct control *)
1041 xmalloc (sizeof (struct control) * control_allocated);
1043 else if (control_used == control_allocated)
1045 control_allocated += ALLOC_SIZE;
1046 controls = (struct control *)
1047 xrealloc ((char *) controls,
1048 sizeof (struct control) * control_allocated);
1050 p = &controls[control_used++];
1053 p->repeat_forever = 0;
1054 p->lines_required = 0;
1059 /* Check if there is a numeric offset after a regular expression.
1060 STR is the entire command line argument.
1061 P is the control record for this regular expression.
1062 NUM is the numeric part of STR. */
1065 check_for_offset (struct control *p, const char *str, const char *num)
1069 if (*num != '-' && *num != '+')
1070 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1072 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1074 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1075 p->offset = (unsigned int) val;
1078 p->offset = -p->offset;
1081 /* Given that the first character of command line arg STR is '{',
1082 make sure that the rest of the string is a valid repeat count
1083 and store its value in P.
1084 ARGNUM is the ARGV index of STR. */
1087 parse_repeat_count (int argnum, struct control *p, char *str)
1092 end = str + strlen (str) - 1;
1094 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1097 if (str+1 == end-1 && *(str+1) == '*')
1098 p->repeat_forever = 1;
1101 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1104 error (EXIT_FAILURE, 0,
1105 _("%s}: integer required between `{' and `}'"),
1106 global_argv[argnum]);
1108 p->repeat = (unsigned int) val;
1114 /* Extract the regular expression from STR and check for a numeric offset.
1115 STR should start with the regexp delimiter character.
1116 Return a new control record for the regular expression.
1117 ARGNUM is the ARGV index of STR.
1118 Unless IGNORE is TRUE, mark these lines for output. */
1120 static struct control *
1121 extract_regexp (int argnum, boolean ignore, char *str)
1123 int len; /* Number of chars in this regexp. */
1125 char *closing_delim;
1129 closing_delim = strrchr (str + 1, delim);
1130 if (closing_delim == NULL)
1131 error (EXIT_FAILURE, 0,
1132 _("%s: closing delimeter `%c' missing"), str, delim);
1134 len = closing_delim - str - 1;
1135 p = new_control_record ();
1139 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1140 strncpy (p->regexpr, str + 1, len);
1141 p->re_compiled.allocated = len * 2;
1142 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1143 p->re_compiled.fastmap = xmalloc (256);
1144 p->re_compiled.translate = 0;
1145 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1148 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1152 if (closing_delim[1])
1153 check_for_offset (p, str, closing_delim + 1);
1158 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1159 After each pattern, check if the next argument is a repeat count. */
1162 parse_patterns (int argc, int start, char **argv)
1164 int i; /* Index into ARGV. */
1165 struct control *p; /* New control record created. */
1167 static unsigned long last_val = 0;
1169 for (i = start; i < argc; i++)
1171 if (*argv[i] == '/' || *argv[i] == '%')
1173 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1177 p = new_control_record ();
1180 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1182 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1184 error (EXIT_FAILURE, 0,
1185 _("%s: line number must be greater than zero"),
1188 error (EXIT_FAILURE, 0,
1189 _("line number `%s' is smaller than preceding line number, %lu"),
1192 if (val == last_val)
1194 _("warning: line number `%s' is the same as preceding line number"),
1198 p->lines_required = (int) val;
1201 if (i + 1 < argc && *argv[i + 1] == '{')
1203 /* We have a repeat count. */
1205 parse_repeat_count (i, p, argv[i]);
1211 get_format_flags (char **format_ptr)
1215 for (; **format_ptr; (*format_ptr)++)
1217 switch (**format_ptr)
1228 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1239 get_format_width (char **format_ptr)
1245 start = *format_ptr;
1246 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1249 ch_save = **format_ptr;
1250 **format_ptr = '\0';
1251 /* In the case where no minimum field width is explicitly specified,
1252 allow for enough octal digits to represent the value of LONG_MAX. */
1253 count = ((*format_ptr == start)
1254 ? bytes_to_octal_digits[sizeof (long)]
1256 **format_ptr = ch_save;
1261 get_format_prec (char **format_ptr)
1268 if (**format_ptr != '.')
1272 if (**format_ptr == '-' || **format_ptr == '+')
1274 is_negative = (**format_ptr == '-');
1282 start = *format_ptr;
1283 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1286 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1289 start = *format_ptr;
1291 ch_save = **format_ptr;
1292 **format_ptr = '\0';
1293 count = (*format_ptr == start) ? 11 : atoi (start);
1294 **format_ptr = ch_save;
1300 get_format_conv_type (char **format_ptr)
1302 int ch = *((*format_ptr)++);
1315 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1320 error (EXIT_FAILURE, 0,
1321 _("invalid conversion specifier in suffix: %c"), ch);
1323 error (EXIT_FAILURE, 0,
1324 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1329 max_out (char *format)
1331 unsigned out_count = 0;
1332 unsigned percents = 0;
1343 out_count += get_format_flags (&format);
1345 int width = get_format_width (&format);
1346 int prec = get_format_prec (&format);
1348 out_count += MAX (width, prec);
1350 get_format_conv_type (&format);
1355 error (EXIT_FAILURE, 0,
1356 _("missing %% conversion specification in suffix"));
1357 else if (percents > 1)
1358 error (EXIT_FAILURE, 0,
1359 _("too many %% conversion specifications in suffix"));
1365 main (int argc, char **argv)
1370 struct sigaction oldact, newact;
1373 program_name = argv[0];
1374 setlocale (LC_ALL, "");
1375 bindtextdomain (PACKAGE, LOCALEDIR);
1376 textdomain (PACKAGE);
1378 parse_long_options (argc, argv, "csplit", GNU_PACKAGE, VERSION,
1379 "Stuart Kemp and David MacKenzie", usage);
1384 suppress_count = FALSE;
1385 remove_files = TRUE;
1386 prefix = DEFAULT_PREFIX;
1388 /* Change the way xmalloc and xrealloc fail. */
1389 xalloc_fail_func = cleanup;
1392 newact.sa_handler = interrupt_handler;
1393 sigemptyset (&newact.sa_mask);
1394 newact.sa_flags = 0;
1396 sigaction (SIGHUP, NULL, &oldact);
1397 if (oldact.sa_handler != SIG_IGN)
1398 sigaction (SIGHUP, &newact, NULL);
1400 sigaction (SIGINT, NULL, &oldact);
1401 if (oldact.sa_handler != SIG_IGN)
1402 sigaction (SIGINT, &newact, NULL);
1404 sigaction (SIGQUIT, NULL, &oldact);
1405 if (oldact.sa_handler != SIG_IGN)
1406 sigaction (SIGQUIT, &newact, NULL);
1408 sigaction (SIGTERM, NULL, &oldact);
1409 if (oldact.sa_handler != SIG_IGN)
1410 sigaction (SIGTERM, &newact, NULL);
1411 #else /* not SA_INTERRUPT */
1412 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1413 signal (SIGHUP, interrupt_handler);
1414 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1415 signal (SIGINT, interrupt_handler);
1416 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1417 signal (SIGQUIT, interrupt_handler);
1418 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1419 signal (SIGTERM, interrupt_handler);
1420 #endif /* not SA_INTERRUPT */
1422 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1437 remove_files = FALSE;
1441 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1443 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1449 suppress_count = TRUE;
1453 elide_empty_files = TRUE;
1460 if (argc - optind < 2)
1462 error (0, 0, _("too few arguments"));
1467 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1469 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1471 set_input_file (argv[optind++]);
1473 parse_patterns (argc, optind, argv);
1477 if (close (input_desc) < 0)
1479 error (0, errno, _("read error"));
1483 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1484 error (EXIT_FAILURE, errno, _("write error"));
1486 exit (EXIT_SUCCESS);
1493 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1498 Usage: %s [OPTION]... FILE PATTERN...\n\
1502 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1503 and output byte counts of each piece to standard output.\n\
1505 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1506 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1507 -k, --keep-files do not remove output files on errors\n\
1508 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1509 -s, --quiet, --silent do not print counts of output file sizes\n\
1510 -z, --elide-empty-files remove empty output files\n\
1511 --help display this help and exit\n\
1512 --version output version information and exit\n\
1514 Read standard input if FILE is -. Each PATTERN may be:\n\
1516 INTEGER copy up to but not including specified line number\n\
1517 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1518 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1519 {INTEGER} repeat the previous pattern specified number of times\n\
1520 {*} repeat the previous pattern as many times as possible\n\
1522 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1524 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
1526 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);