1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 /* Disable assertions. Some systems have broken assert macros. */
30 #include <sys/types.h>
44 #include "safe-read.h"
51 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
59 /* Increment size of area for control records. */
62 /* The default prefix for output file names. */
63 #define DEFAULT_PREFIX "xx"
67 /* A compiled pattern arg. */
70 char *regexpr; /* Non-compiled regular expression. */
71 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
72 int offset; /* Offset from regexp to split at. */
73 int lines_required; /* Number of lines required. */
74 unsigned int repeat; /* Repeat count. */
75 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
76 int argnum; /* ARGV index. */
77 boolean ignore; /* If true, produce no output (for regexp). */
80 /* Initial size of data area in buffers. */
81 #define START_SIZE 8191
83 /* Increment size for data area. */
84 #define INCR_SIZE 2048
86 /* Number of lines kept in each node in line list. */
90 /* Some small values to test the algorithms. */
91 # define START_SIZE 200
96 /* A string with a length count. */
103 /* Pointers to the beginnings of lines in the buffer area.
104 These structures are linked together if needed. */
107 unsigned used; /* Number of offsets used in this struct. */
108 unsigned insert_index; /* Next offset to use when inserting line. */
109 unsigned retrieve_index; /* Next index to use when retrieving line. */
110 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
111 struct line *next; /* Next in linked list. */
114 /* The structure to hold the input lines.
115 Contains a pointer to the data area and a list containing
116 pointers to the individual lines. */
119 unsigned bytes_alloc; /* Size of the buffer area. */
120 unsigned bytes_used; /* Bytes used in the buffer area. */
121 unsigned start_line; /* First line number in this buffer. */
122 unsigned first_available; /* First line that can be retrieved. */
123 unsigned num_lines; /* Number of complete lines in this buffer. */
124 char *buffer; /* Data area. */
125 struct line *line_start; /* Head of list of pointers to lines. */
126 struct line *curr_line; /* The line start record currently in use. */
127 struct buffer_record *next;
130 static void close_output_file PARAMS ((void));
131 static void create_output_file PARAMS ((void));
132 static void delete_all_files PARAMS ((void));
133 static void save_line_to_file PARAMS ((const struct cstring *line));
134 static void usage PARAMS ((int status));
136 /* The name this program was run with. */
139 /* Convert the number of 8-bit bytes of a binary representation to
140 the number of characters required to represent the same quantity
141 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
142 require a field width as wide as 11 characters. */
143 static const unsigned int bytes_to_octal_digits[] =
144 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
146 /* Input file descriptor. */
147 static int input_desc = 0;
149 /* List of available buffers. */
150 static struct buffer_record *free_list = NULL;
152 /* Start of buffer list. */
153 static struct buffer_record *head = NULL;
155 /* Partially read line. */
156 static char *hold_area = NULL;
158 /* Number of chars in `hold_area'. */
159 static unsigned hold_count = 0;
161 /* Number of the last line in the buffers. */
162 static unsigned last_line_number = 0;
164 /* Number of the line currently being examined. */
165 static unsigned current_line = 0;
167 /* If TRUE, we have read EOF. */
168 static boolean have_read_eof = FALSE;
170 /* Name of output files. */
171 static char *filename_space = NULL;
173 /* Prefix part of output file names. */
174 static char *prefix = NULL;
176 /* Suffix part of output file names. */
177 static char *suffix = NULL;
179 /* Number of digits to use in output file names. */
180 static int digits = 2;
182 /* Number of files created so far. */
183 static unsigned int files_created = 0;
185 /* Number of bytes written to current file. */
186 static unsigned int bytes_written;
188 /* Output file pointer. */
189 static FILE *output_stream = NULL;
191 /* Output file name. */
192 static char *output_filename = NULL;
194 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
195 static char **global_argv;
197 /* If TRUE, do not print the count of bytes in each output file. */
198 static boolean suppress_count;
200 /* If TRUE, remove output files on error. */
201 static boolean remove_files;
203 /* If TRUE, remove all output files which have a zero length. */
204 static boolean elide_empty_files;
206 /* The compiled pattern arguments, which determine how to split
208 static struct control *controls;
210 /* Number of elements in `controls'. */
211 static unsigned int control_used;
213 /* If nonzero, display usage information and exit. */
214 static int show_help;
216 /* If nonzero, print the version on standard output then exit. */
217 static int show_version;
219 static struct option const longopts[] =
221 {"digits", required_argument, NULL, 'n'},
222 {"quiet", no_argument, NULL, 'q'},
223 {"silent", no_argument, NULL, 's'},
224 {"keep-files", no_argument, NULL, 'k'},
225 {"elide-empty-files", no_argument, NULL, 'z'},
226 {"prefix", required_argument, NULL, 'f'},
227 {"suffix-format", required_argument, NULL, 'b'},
228 {"help", no_argument, &show_help, 1},
229 {"version", no_argument, &show_version, 1},
233 /* Optionally remove files created so far; then exit.
234 Called when an error detected. */
240 close_output_file ();
254 interrupt_handler (int sig)
257 struct sigaction sigact;
259 sigact.sa_handler = SIG_DFL;
260 sigemptyset (&sigact.sa_mask);
262 sigaction (sig, &sigact, NULL);
263 #else /* !SA_INTERRUPT */
264 signal (sig, SIG_DFL);
265 #endif /* SA_INTERRUPT */
267 kill (getpid (), sig);
270 /* Keep track of NUM chars of a partial line in buffer START.
271 These chars will be retrieved later when another large buffer is read.
272 It is not necessary to create a new buffer for these chars; instead,
273 we keep a pointer to the existing buffer. This buffer *is* on the
274 free list, and when the next buffer is obtained from this list
275 (even if it is this one), these chars will be placed at the
276 start of the new buffer. */
279 save_to_hold_area (char *start, unsigned int num)
285 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
286 Return the number of chars read. */
289 read_input (char *dest, unsigned int max_n_bytes)
293 if (max_n_bytes == 0)
296 bytes_read = safe_read (input_desc, dest, max_n_bytes);
299 have_read_eof = TRUE;
303 error (0, errno, _("read error"));
310 /* Initialize existing line record P. */
313 clear_line_control (struct line *p)
317 p->retrieve_index = 0;
320 /* Initialize all line records in B. */
323 clear_all_line_control (struct buffer_record *b)
327 for (l = b->line_start; l; l = l->next)
328 clear_line_control (l);
331 /* Return a new, initialized line record. */
334 new_line_control (void)
338 p = (struct line *) xmalloc (sizeof (struct line));
341 clear_line_control (p);
346 /* Record LINE_START, which is the address of the start of a line
347 of length LINE_LEN in the large buffer, in the lines buffer of B. */
350 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
354 /* If there is no existing area to keep line info, get some. */
355 if (b->line_start == NULL)
356 b->line_start = b->curr_line = new_line_control ();
358 /* If existing area for lines is full, get more. */
359 if (b->curr_line->used == CTRL_SIZE)
361 b->curr_line->next = new_line_control ();
362 b->curr_line = b->curr_line->next;
367 /* Record the start of the line, and update counters. */
368 l->starts[l->insert_index].str = line_start;
369 l->starts[l->insert_index].len = line_len;
374 /* Scan the buffer in B for newline characters
375 and record the line start locations and lengths in B.
376 Return the number of lines found in this buffer.
378 There may be an incomplete line at the end of the buffer;
379 a pointer is kept to this area, which will be used when
380 the next buffer is filled. */
383 record_line_starts (struct buffer_record *b)
385 char *line_start; /* Start of current line. */
386 char *line_end; /* End of each line found. */
387 unsigned int bytes_left; /* Length of incomplete last line. */
388 unsigned int lines; /* Number of lines found. */
389 unsigned int line_length; /* Length of each line found. */
391 if (b->bytes_used == 0)
395 line_start = b->buffer;
396 bytes_left = b->bytes_used;
400 line_end = memchr (line_start, '\n', bytes_left);
401 if (line_end == NULL)
403 line_length = line_end - line_start + 1;
404 keep_new_line (b, line_start, line_length);
405 bytes_left -= line_length;
406 line_start = line_end + 1;
410 /* Check for an incomplete last line. */
415 keep_new_line (b, line_start, bytes_left);
419 save_to_hold_area (line_start, bytes_left);
422 b->num_lines = lines;
423 b->first_available = b->start_line = last_line_number + 1;
424 last_line_number += lines;
429 /* Return a new buffer with room to store SIZE bytes, plus
430 an extra byte for safety. */
432 static struct buffer_record *
433 create_new_buffer (unsigned int size)
435 struct buffer_record *new_buffer;
437 new_buffer = (struct buffer_record *)
438 xmalloc (sizeof (struct buffer_record));
440 new_buffer->buffer = (char *) xmalloc (size + 1);
442 new_buffer->bytes_alloc = size;
443 new_buffer->line_start = new_buffer->curr_line = NULL;
448 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
449 least that size is currently free, use it, otherwise create a new one. */
451 static struct buffer_record *
452 get_new_buffer (unsigned int min_size)
454 struct buffer_record *p, *q;
455 struct buffer_record *new_buffer; /* Buffer to return. */
456 unsigned int alloc_size; /* Actual size that will be requested. */
458 alloc_size = START_SIZE;
459 while (min_size > alloc_size)
460 alloc_size += INCR_SIZE;
462 if (free_list == NULL)
463 new_buffer = create_new_buffer (alloc_size);
466 /* Use first-fit to find a buffer. */
467 p = new_buffer = NULL;
472 if (q->bytes_alloc >= min_size)
485 new_buffer = (q ? q : create_new_buffer (alloc_size));
487 new_buffer->curr_line = new_buffer->line_start;
488 clear_all_line_control (new_buffer);
491 new_buffer->num_lines = 0;
492 new_buffer->bytes_used = 0;
493 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
494 new_buffer->next = NULL;
499 /* Add buffer BUF to the list of free buffers. */
502 free_buffer (struct buffer_record *buf)
504 buf->next = free_list;
508 /* Append buffer BUF to the linked list of buffers that contain
509 some data yet to be processed. */
512 save_buffer (struct buffer_record *buf)
514 struct buffer_record *p;
517 buf->curr_line = buf->line_start;
523 for (p = head; p->next; p = p->next)
529 /* Fill a buffer of input.
531 Set the initial size of the buffer to a default.
532 Fill the buffer (from the hold area and input stream)
533 and find the individual lines.
534 If no lines are found (the buffer is too small to hold the next line),
535 release the current buffer (whose contents would have been put in the
536 hold area) and repeat the process with another large buffer until at least
537 one entire line has been read.
539 Return TRUE if a new buffer was obtained, otherwise false
540 (in which case end-of-file must have been encountered). */
545 struct buffer_record *b;
546 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
547 unsigned int bytes_avail; /* Size of new buffer created. */
548 unsigned int lines_found; /* Number of lines in this new buffer. */
549 char *p; /* Place to load into buffer. */
554 /* We must make the buffer at least as large as the amount of data
555 in the partial line left over from the last call. */
556 if (bytes_wanted < hold_count)
557 bytes_wanted = hold_count;
561 b = get_new_buffer (bytes_wanted);
562 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
565 /* First check the `holding' area for a partial line. */
569 memcpy (p, hold_area, hold_count);
571 b->bytes_used += hold_count;
572 bytes_avail -= hold_count;
576 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
578 lines_found = record_line_starts (b);
579 bytes_wanted = b->bytes_alloc * 2;
583 while (!lines_found && !have_read_eof);
588 return lines_found != 0;
591 /* Return the line number of the first line that has not yet been retrieved. */
594 get_first_line_in_buffer (void)
596 if (head == NULL && !load_buffer ())
597 error (EXIT_FAILURE, errno, _("input disappeared"));
599 return head->first_available;
602 /* Return a pointer to the logical first line in the buffer and make the
603 next line the logical first line.
604 Return NULL if there is no more input. */
606 static struct cstring *
609 struct cstring *line; /* Return value. */
610 struct line *l; /* For convenience. */
612 if (head == NULL && !load_buffer ())
615 if (current_line < head->first_available)
616 current_line = head->first_available;
618 ++(head->first_available);
622 line = &l->starts[l->retrieve_index];
624 /* Advance index to next line. */
625 if (++l->retrieve_index == l->used)
627 /* Go on to the next line record. */
628 head->curr_line = l->next;
629 if (head->curr_line == NULL || head->curr_line->used == 0)
631 /* Go on to the next data block. */
632 struct buffer_record *b = head;
641 /* Search the buffers for line LINENUM, reading more input if necessary.
642 Return a pointer to the line, or NULL if it is not found in the file. */
644 static struct cstring *
645 find_line (unsigned int linenum)
647 struct buffer_record *b;
649 if (head == NULL && !load_buffer ())
652 if (linenum < head->start_line)
657 if (linenum < b->start_line + b->num_lines)
659 /* The line is in this buffer. */
661 unsigned int offset; /* How far into the buffer the line is. */
664 offset = linenum - b->start_line;
665 /* Find the control record. */
666 while (offset >= CTRL_SIZE)
671 return &l->starts[offset];
673 if (b->next == NULL && !load_buffer ())
675 b = b->next; /* Try the next data block. */
679 /* Return TRUE if at least one more line is available for input. */
684 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
687 /* Set the name of the input file to NAME and open it. */
690 set_input_file (const char *name)
692 if (STREQ (name, "-"))
696 input_desc = open (name, O_RDONLY);
698 error (EXIT_FAILURE, errno, "%s", name);
702 /* Write all lines from the beginning of the buffer up to, but
703 not including, line LAST_LINE, to the current output file.
704 If IGNORE is TRUE, do not output lines selected here.
705 ARGNUM is the index in ARGV of the current pattern. */
708 write_to_file (unsigned int last_line, boolean ignore, int argnum)
710 struct cstring *line;
711 unsigned int first_line; /* First available input line. */
712 unsigned int lines; /* Number of lines to output. */
715 first_line = get_first_line_in_buffer ();
717 if (first_line > last_line)
719 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
723 lines = last_line - first_line;
725 for (i = 0; i < lines; i++)
727 line = remove_line ();
730 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
734 save_line_to_file (line);
738 /* Output any lines left after all regexps have been processed. */
741 dump_rest_of_file (void)
743 struct cstring *line;
745 while ((line = remove_line ()) != NULL)
746 save_line_to_file (line);
749 /* Handle an attempt to read beyond EOF under the control of record P,
750 on iteration REPETITION if nonzero. */
753 handle_line_error (const struct control *p, int repetition)
755 fprintf (stderr, _("%s: `%d': line number out of range"),
756 program_name, p->lines_required);
758 fprintf (stderr, _(" on repetition %d\n"), repetition);
760 fprintf (stderr, "\n");
765 /* Determine the line number that marks the end of this file,
766 then get those lines and save them to the output file.
767 P is the control record.
768 REPETITION is the repetition number. */
771 process_line_count (const struct control *p, int repetition)
773 unsigned int linenum;
774 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
775 struct cstring *line;
777 create_output_file ();
779 linenum = get_first_line_in_buffer ();
781 /* Initially, I wanted to assert linenum < last_line_to_save, but that
782 condition is false for the valid command: echo | csplit - 1 '{*}'.
783 So, relax it just a little. */
784 assert ((linenum == 1 && last_line_to_save == 1)
785 || linenum < last_line_to_save);
787 while (linenum++ < last_line_to_save)
789 line = remove_line ();
791 handle_line_error (p, repetition);
792 save_line_to_file (line);
795 close_output_file ();
797 /* Ensure that the line number specified is not 1 greater than
798 the number of lines in the file. */
799 if (no_more_lines ())
800 handle_line_error (p, repetition);
804 regexp_error (struct control *p, int repetition, boolean ignore)
806 fprintf (stderr, _("%s: `%s': match not found"),
807 program_name, global_argv[p->argnum]);
810 fprintf (stderr, _(" on repetition %d\n"), repetition);
812 fprintf (stderr, "\n");
816 dump_rest_of_file ();
817 close_output_file ();
822 /* Read the input until a line matches the regexp in P, outputting
823 it unless P->IGNORE is TRUE.
824 REPETITION is this repeat-count; 0 means the first time. */
827 process_regexp (struct control *p, int repetition)
829 struct cstring *line; /* From input file. */
830 unsigned int line_len; /* To make "$" in regexps work. */
831 unsigned int break_line; /* First line number of next file. */
832 boolean ignore = p->ignore; /* If TRUE, skip this section. */
836 create_output_file ();
838 /* If there is no offset for the regular expression, or
839 it is positive, then it is not necessary to buffer the lines. */
845 line = find_line (++current_line);
848 if (p->repeat_forever)
852 dump_rest_of_file ();
853 close_output_file ();
858 regexp_error (p, repetition, ignore);
860 line_len = line->len;
861 if (line->str[line_len - 1] == '\n')
863 ret = re_search (&p->re_compiled, line->str, line_len,
864 0, line_len, (struct re_registers *) 0);
867 error (0, 0, _("error in regular expression search"));
872 line = remove_line ();
874 save_line_to_file (line);
882 /* Buffer the lines. */
885 line = find_line (++current_line);
888 if (p->repeat_forever)
892 dump_rest_of_file ();
893 close_output_file ();
898 regexp_error (p, repetition, ignore);
900 line_len = line->len;
901 if (line->str[line_len - 1] == '\n')
903 ret = re_search (&p->re_compiled, line->str, line_len,
904 0, line_len, (struct re_registers *) 0);
907 error (0, 0, _("error in regular expression search"));
915 /* Account for any offset from this regexp. */
916 break_line = current_line + p->offset;
918 write_to_file (break_line, ignore, p->argnum);
921 close_output_file ();
924 current_line = break_line;
927 /* Split the input file according to the control records we have built. */
934 for (i = 0; i < control_used; i++)
936 if (controls[i].regexpr)
938 for (j = 0; (controls[i].repeat_forever
939 || j <= controls[i].repeat); j++)
940 process_regexp (&controls[i], j);
944 for (j = 0; (controls[i].repeat_forever
945 || j <= controls[i].repeat); j++)
946 process_line_count (&controls[i], j);
950 create_output_file ();
951 dump_rest_of_file ();
952 close_output_file ();
955 /* Return the name of output file number NUM. */
958 make_filename (unsigned int num)
960 strcpy (filename_space, prefix);
962 sprintf (filename_space+strlen(prefix), suffix, num);
964 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
965 return filename_space;
968 /* Create the next output file. */
971 create_output_file (void)
973 output_filename = make_filename (files_created);
974 output_stream = fopen (output_filename, "w");
975 if (output_stream == NULL)
977 error (0, errno, "%s", output_filename);
984 /* Delete all the files we have created. */
987 delete_all_files (void)
992 for (i = 0; i < files_created; i++)
994 name = make_filename (i);
996 error (0, errno, "%s", name);
1000 /* Close the current output file and print the count
1001 of characters in this file. */
1004 close_output_file (void)
1008 if (ferror (output_stream) || fclose (output_stream) == EOF)
1010 error (0, errno, _("write error for `%s'"), output_filename);
1011 output_stream = NULL;
1014 if (bytes_written == 0 && elide_empty_files)
1016 if (unlink (output_filename))
1017 error (0, errno, "%s", output_filename);
1022 /* FIXME: if we write to stdout here, we have to close stdout
1023 and check for errors. */
1024 if (!suppress_count)
1025 fprintf (stdout, "%d\n", bytes_written);
1027 output_stream = NULL;
1031 /* Save line LINE to the output file and
1032 increment the character count for the current file. */
1035 save_line_to_file (const struct cstring *line)
1037 fwrite (line->str, sizeof (char), line->len, output_stream);
1038 bytes_written += line->len;
1041 /* Return a new, initialized control record. */
1043 static struct control *
1044 new_control_record (void)
1046 static unsigned control_allocated = 0; /* Total space allocated. */
1049 if (control_allocated == 0)
1051 control_allocated = ALLOC_SIZE;
1052 controls = (struct control *)
1053 xmalloc (sizeof (struct control) * control_allocated);
1055 else if (control_used == control_allocated)
1057 control_allocated += ALLOC_SIZE;
1058 controls = (struct control *)
1059 xrealloc ((char *) controls,
1060 sizeof (struct control) * control_allocated);
1062 p = &controls[control_used++];
1065 p->repeat_forever = 0;
1066 p->lines_required = 0;
1071 /* Check if there is a numeric offset after a regular expression.
1072 STR is the entire command line argument.
1073 P is the control record for this regular expression.
1074 NUM is the numeric part of STR. */
1077 check_for_offset (struct control *p, const char *str, const char *num)
1081 if (*num != '-' && *num != '+')
1082 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1084 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1086 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1087 p->offset = (unsigned int) val;
1090 p->offset = -p->offset;
1093 /* Given that the first character of command line arg STR is '{',
1094 make sure that the rest of the string is a valid repeat count
1095 and store its value in P.
1096 ARGNUM is the ARGV index of STR. */
1099 parse_repeat_count (int argnum, struct control *p, char *str)
1104 end = str + strlen (str) - 1;
1106 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1109 if (str+1 == end-1 && *(str+1) == '*')
1110 p->repeat_forever = 1;
1113 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1116 error (EXIT_FAILURE, 0,
1117 _("%s}: integer required between `{' and `}'"),
1118 global_argv[argnum]);
1120 p->repeat = (unsigned int) val;
1126 /* Extract the regular expression from STR and check for a numeric offset.
1127 STR should start with the regexp delimiter character.
1128 Return a new control record for the regular expression.
1129 ARGNUM is the ARGV index of STR.
1130 Unless IGNORE is TRUE, mark these lines for output. */
1132 static struct control *
1133 extract_regexp (int argnum, boolean ignore, char *str)
1135 int len; /* Number of chars in this regexp. */
1137 char *closing_delim;
1141 closing_delim = strrchr (str + 1, delim);
1142 if (closing_delim == NULL)
1143 error (EXIT_FAILURE, 0,
1144 _("%s: closing delimeter `%c' missing"), str, delim);
1146 len = closing_delim - str - 1;
1147 p = new_control_record ();
1151 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1152 strncpy (p->regexpr, str + 1, len);
1153 p->re_compiled.allocated = len * 2;
1154 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1155 p->re_compiled.fastmap = xmalloc (256);
1156 p->re_compiled.translate = 0;
1158 p->re_compiled.syntax_parens = 0;
1160 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1163 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1167 if (closing_delim[1])
1168 check_for_offset (p, str, closing_delim + 1);
1173 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1174 After each pattern, check if the next argument is a repeat count. */
1177 parse_patterns (int argc, int start, char **argv)
1179 int i; /* Index into ARGV. */
1180 struct control *p; /* New control record created. */
1182 static unsigned long last_val = 0;
1184 for (i = start; i < argc; i++)
1186 if (*argv[i] == '/' || *argv[i] == '%')
1188 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1192 p = new_control_record ();
1195 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1197 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1199 error (EXIT_FAILURE, 0,
1200 _("%s: line number must be greater than zero"),
1203 error (EXIT_FAILURE, 0,
1204 _("line number `%s' is smaller than preceding line number, %lu"),
1207 if (val == last_val)
1209 _("warning: line number `%s' is the same as preceding line number"),
1213 p->lines_required = (int) val;
1216 if (i + 1 < argc && *argv[i + 1] == '{')
1218 /* We have a repeat count. */
1220 parse_repeat_count (i, p, argv[i]);
1226 get_format_flags (char **format_ptr)
1230 for (; **format_ptr; (*format_ptr)++)
1232 switch (**format_ptr)
1243 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1254 get_format_width (char **format_ptr)
1260 start = *format_ptr;
1261 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1264 ch_save = **format_ptr;
1265 **format_ptr = '\0';
1266 /* In the case where no minimum field width is explicitly specified,
1267 allow for enough octal digits to represent the value of LONG_MAX. */
1268 count = ((*format_ptr == start)
1269 ? bytes_to_octal_digits[sizeof (long)]
1271 **format_ptr = ch_save;
1276 get_format_prec (char **format_ptr)
1283 if (**format_ptr != '.')
1287 if (**format_ptr == '-' || **format_ptr == '+')
1289 is_negative = (**format_ptr == '-');
1297 start = *format_ptr;
1298 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1301 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1304 start = *format_ptr;
1306 ch_save = **format_ptr;
1307 **format_ptr = '\0';
1308 count = (*format_ptr == start) ? 11 : atoi (start);
1309 **format_ptr = ch_save;
1315 get_format_conv_type (char **format_ptr)
1317 int ch = *((*format_ptr)++);
1330 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1335 error (EXIT_FAILURE, 0,
1336 _("invalid conversion specifier in suffix: %c"), ch);
1338 error (EXIT_FAILURE, 0,
1339 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1344 max_out (char *format)
1346 unsigned out_count = 0;
1347 unsigned percents = 0;
1358 out_count += get_format_flags (&format);
1360 int width = get_format_width (&format);
1361 int prec = get_format_prec (&format);
1363 out_count += MAX (width, prec);
1365 get_format_conv_type (&format);
1370 error (EXIT_FAILURE, 0,
1371 _("missing %% conversion specification in suffix"));
1372 else if (percents > 1)
1373 error (EXIT_FAILURE, 0,
1374 _("too many %% conversion specifications in suffix"));
1380 main (int argc, char **argv)
1385 struct sigaction oldact, newact;
1388 program_name = argv[0];
1389 setlocale (LC_ALL, "");
1390 bindtextdomain (PACKAGE, LOCALEDIR);
1391 textdomain (PACKAGE);
1396 suppress_count = FALSE;
1397 remove_files = TRUE;
1398 prefix = DEFAULT_PREFIX;
1400 /* Change the way xmalloc and xrealloc fail. */
1401 xalloc_fail_func = cleanup;
1404 newact.sa_handler = interrupt_handler;
1405 sigemptyset (&newact.sa_mask);
1406 newact.sa_flags = 0;
1408 sigaction (SIGHUP, NULL, &oldact);
1409 if (oldact.sa_handler != SIG_IGN)
1410 sigaction (SIGHUP, &newact, NULL);
1412 sigaction (SIGINT, NULL, &oldact);
1413 if (oldact.sa_handler != SIG_IGN)
1414 sigaction (SIGINT, &newact, NULL);
1416 sigaction (SIGQUIT, NULL, &oldact);
1417 if (oldact.sa_handler != SIG_IGN)
1418 sigaction (SIGQUIT, &newact, NULL);
1420 sigaction (SIGTERM, NULL, &oldact);
1421 if (oldact.sa_handler != SIG_IGN)
1422 sigaction (SIGTERM, &newact, NULL);
1423 #else /* not SA_INTERRUPT */
1424 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1425 signal (SIGHUP, interrupt_handler);
1426 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1427 signal (SIGINT, interrupt_handler);
1428 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1429 signal (SIGQUIT, interrupt_handler);
1430 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1431 signal (SIGTERM, interrupt_handler);
1432 #endif /* not SA_INTERRUPT */
1434 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1449 remove_files = FALSE;
1453 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1455 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1461 suppress_count = TRUE;
1465 elide_empty_files = TRUE;
1474 printf ("csplit (%s) %s\n", GNU_PACKAGE, VERSION);
1475 exit (EXIT_SUCCESS);
1481 if (argc - optind < 2)
1483 error (0, 0, _("too few arguments"));
1488 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1490 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1492 set_input_file (argv[optind++]);
1494 parse_patterns (argc, optind, argv);
1498 if (close (input_desc) < 0)
1500 error (0, errno, _("read error"));
1504 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1505 error (EXIT_FAILURE, errno, _("write error"));
1507 exit (EXIT_SUCCESS);
1514 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1519 Usage: %s [OPTION]... FILE PATTERN...\n\
1523 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1524 and output byte counts of each piece to standard output.\n\
1526 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1527 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1528 -k, --keep-files do not remove output files on errors\n\
1529 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1530 -s, --quiet, --silent do not print counts of output file sizes\n\
1531 -z, --elide-empty-files remove empty output files\n\
1532 --help display this help and exit\n\
1533 --version output version information and exit\n\
1535 Read standard input if FILE is -. Each PATTERN may be:\n\
1537 INTEGER copy up to but not including specified line number\n\
1538 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1539 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1540 {INTEGER} repeat the previous pattern specified number of times\n\
1541 {*} repeat the previous pattern as many times as possible\n\
1543 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1545 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
1547 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);