1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
26 #include <sys/types.h>
40 #include "safe-read.h"
47 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
55 /* Increment size of area for control records. */
58 /* The default prefix for output file names. */
59 #define DEFAULT_PREFIX "xx"
63 /* A compiled pattern arg. */
66 char *regexpr; /* Non-compiled regular expression. */
67 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
68 int offset; /* Offset from regexp to split at. */
69 int lines_required; /* Number of lines required. */
70 unsigned int repeat; /* Repeat count. */
71 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
72 int argnum; /* ARGV index. */
73 boolean ignore; /* If true, produce no output (for regexp). */
76 /* Initial size of data area in buffers. */
77 #define START_SIZE 8191
79 /* Increment size for data area. */
80 #define INCR_SIZE 2048
82 /* Number of lines kept in each node in line list. */
86 /* Some small values to test the algorithms. */
87 # define START_SIZE 200
92 /* A string with a length count. */
99 /* Pointers to the beginnings of lines in the buffer area.
100 These structures are linked together if needed. */
103 unsigned used; /* Number of offsets used in this struct. */
104 unsigned insert_index; /* Next offset to use when inserting line. */
105 unsigned retrieve_index; /* Next index to use when retrieving line. */
106 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
107 struct line *next; /* Next in linked list. */
110 /* The structure to hold the input lines.
111 Contains a pointer to the data area and a list containing
112 pointers to the individual lines. */
115 unsigned bytes_alloc; /* Size of the buffer area. */
116 unsigned bytes_used; /* Bytes used in the buffer area. */
117 unsigned start_line; /* First line number in this buffer. */
118 unsigned first_available; /* First line that can be retrieved. */
119 unsigned num_lines; /* Number of complete lines in this buffer. */
120 char *buffer; /* Data area. */
121 struct line *line_start; /* Head of list of pointers to lines. */
122 struct line *curr_line; /* The line start record currently in use. */
123 struct buffer_record *next;
126 static void close_output_file PARAMS ((void));
127 static void create_output_file PARAMS ((void));
128 static void delete_all_files PARAMS ((void));
129 static void save_line_to_file PARAMS ((const struct cstring *line));
130 static void usage PARAMS ((int status));
132 /* The name this program was run with. */
135 /* Convert the number of 8-bit bytes of a binary representation to
136 the number of characters required to represent the same quantity
137 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
138 require a field width as wide as 11 characters. */
139 static const unsigned int bytes_to_octal_digits[] =
140 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
142 /* Input file descriptor. */
143 static int input_desc = 0;
145 /* List of available buffers. */
146 static struct buffer_record *free_list = NULL;
148 /* Start of buffer list. */
149 static struct buffer_record *head = NULL;
151 /* Partially read line. */
152 static char *hold_area = NULL;
154 /* Number of chars in `hold_area'. */
155 static unsigned hold_count = 0;
157 /* Number of the last line in the buffers. */
158 static unsigned last_line_number = 0;
160 /* Number of the line currently being examined. */
161 static unsigned current_line = 0;
163 /* If TRUE, we have read EOF. */
164 static boolean have_read_eof = FALSE;
166 /* Name of output files. */
167 static char *filename_space = NULL;
169 /* Prefix part of output file names. */
170 static char *prefix = NULL;
172 /* Suffix part of output file names. */
173 static char *suffix = NULL;
175 /* Number of digits to use in output file names. */
176 static int digits = 2;
178 /* Number of files created so far. */
179 static unsigned int files_created = 0;
181 /* Number of bytes written to current file. */
182 static unsigned int bytes_written;
184 /* Output file pointer. */
185 static FILE *output_stream = NULL;
187 /* Output file name. */
188 static char *output_filename = NULL;
190 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
191 static char **global_argv;
193 /* If TRUE, do not print the count of bytes in each output file. */
194 static boolean suppress_count;
196 /* If TRUE, remove output files on error. */
197 static boolean remove_files;
199 /* If TRUE, remove all output files which have a zero length. */
200 static boolean elide_empty_files;
202 /* The compiled pattern arguments, which determine how to split
204 static struct control *controls;
206 /* Number of elements in `controls'. */
207 static unsigned int control_used;
209 /* If nonzero, display usage information and exit. */
210 static int show_help;
212 /* If nonzero, print the version on standard output then exit. */
213 static int show_version;
215 static struct option const longopts[] =
217 {"digits", required_argument, NULL, 'n'},
218 {"quiet", no_argument, NULL, 'q'},
219 {"silent", no_argument, NULL, 's'},
220 {"keep-files", no_argument, NULL, 'k'},
221 {"elide-empty-files", no_argument, NULL, 'z'},
222 {"prefix", required_argument, NULL, 'f'},
223 {"suffix-format", required_argument, NULL, 'b'},
224 {"help", no_argument, &show_help, 1},
225 {"version", no_argument, &show_version, 1},
229 /* Optionally remove files created so far; then exit.
230 Called when an error detected. */
236 close_output_file ();
250 interrupt_handler (int sig)
253 struct sigaction sigact;
255 sigact.sa_handler = SIG_DFL;
256 sigemptyset (&sigact.sa_mask);
258 sigaction (sig, &sigact, NULL);
259 #else /* !SA_INTERRUPT */
260 signal (sig, SIG_DFL);
261 #endif /* SA_INTERRUPT */
263 kill (getpid (), sig);
266 /* Keep track of NUM chars of a partial line in buffer START.
267 These chars will be retrieved later when another large buffer is read.
268 It is not necessary to create a new buffer for these chars; instead,
269 we keep a pointer to the existing buffer. This buffer *is* on the
270 free list, and when the next buffer is obtained from this list
271 (even if it is this one), these chars will be placed at the
272 start of the new buffer. */
275 save_to_hold_area (char *start, unsigned int num)
281 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
282 Return the number of chars read. */
285 read_input (char *dest, unsigned int max_n_bytes)
289 if (max_n_bytes == 0)
292 bytes_read = safe_read (input_desc, dest, max_n_bytes);
295 have_read_eof = TRUE;
299 error (0, errno, _("read error"));
306 /* Initialize existing line record P. */
309 clear_line_control (struct line *p)
313 p->retrieve_index = 0;
316 /* Initialize all line records in B. */
319 clear_all_line_control (struct buffer_record *b)
323 for (l = b->line_start; l; l = l->next)
324 clear_line_control (l);
327 /* Return a new, initialized line record. */
330 new_line_control (void)
334 p = (struct line *) xmalloc (sizeof (struct line));
337 clear_line_control (p);
342 /* Record LINE_START, which is the address of the start of a line
343 of length LINE_LEN in the large buffer, in the lines buffer of B. */
346 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
350 /* If there is no existing area to keep line info, get some. */
351 if (b->line_start == NULL)
352 b->line_start = b->curr_line = new_line_control ();
354 /* If existing area for lines is full, get more. */
355 if (b->curr_line->used == CTRL_SIZE)
357 b->curr_line->next = new_line_control ();
358 b->curr_line = b->curr_line->next;
363 /* Record the start of the line, and update counters. */
364 l->starts[l->insert_index].str = line_start;
365 l->starts[l->insert_index].len = line_len;
370 /* Scan the buffer in B for newline characters
371 and record the line start locations and lengths in B.
372 Return the number of lines found in this buffer.
374 There may be an incomplete line at the end of the buffer;
375 a pointer is kept to this area, which will be used when
376 the next buffer is filled. */
379 record_line_starts (struct buffer_record *b)
381 char *line_start; /* Start of current line. */
382 char *line_end; /* End of each line found. */
383 unsigned int bytes_left; /* Length of incomplete last line. */
384 unsigned int lines; /* Number of lines found. */
385 unsigned int line_length; /* Length of each line found. */
387 if (b->bytes_used == 0)
391 line_start = b->buffer;
392 bytes_left = b->bytes_used;
396 line_end = memchr (line_start, '\n', bytes_left);
397 if (line_end == NULL)
399 line_length = line_end - line_start + 1;
400 keep_new_line (b, line_start, line_length);
401 bytes_left -= line_length;
402 line_start = line_end + 1;
406 /* Check for an incomplete last line. */
411 keep_new_line (b, line_start, bytes_left);
415 save_to_hold_area (line_start, bytes_left);
418 b->num_lines = lines;
419 b->first_available = b->start_line = last_line_number + 1;
420 last_line_number += lines;
425 /* Return a new buffer with room to store SIZE bytes, plus
426 an extra byte for safety. */
428 static struct buffer_record *
429 create_new_buffer (unsigned int size)
431 struct buffer_record *new_buffer;
433 new_buffer = (struct buffer_record *)
434 xmalloc (sizeof (struct buffer_record));
436 new_buffer->buffer = (char *) xmalloc (size + 1);
438 new_buffer->bytes_alloc = size;
439 new_buffer->line_start = new_buffer->curr_line = NULL;
444 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
445 least that size is currently free, use it, otherwise create a new one. */
447 static struct buffer_record *
448 get_new_buffer (unsigned int min_size)
450 struct buffer_record *p, *q;
451 struct buffer_record *new_buffer; /* Buffer to return. */
452 unsigned int alloc_size; /* Actual size that will be requested. */
454 alloc_size = START_SIZE;
455 while (min_size > alloc_size)
456 alloc_size += INCR_SIZE;
458 if (free_list == NULL)
459 new_buffer = create_new_buffer (alloc_size);
462 /* Use first-fit to find a buffer. */
463 p = new_buffer = NULL;
468 if (q->bytes_alloc >= min_size)
481 new_buffer = (q ? q : create_new_buffer (alloc_size));
483 new_buffer->curr_line = new_buffer->line_start;
484 clear_all_line_control (new_buffer);
487 new_buffer->num_lines = 0;
488 new_buffer->bytes_used = 0;
489 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
490 new_buffer->next = NULL;
495 /* Add buffer BUF to the list of free buffers. */
498 free_buffer (struct buffer_record *buf)
500 buf->next = free_list;
504 /* Append buffer BUF to the linked list of buffers that contain
505 some data yet to be processed. */
508 save_buffer (struct buffer_record *buf)
510 struct buffer_record *p;
513 buf->curr_line = buf->line_start;
519 for (p = head; p->next; p = p->next)
525 /* Fill a buffer of input.
527 Set the initial size of the buffer to a default.
528 Fill the buffer (from the hold area and input stream)
529 and find the individual lines.
530 If no lines are found (the buffer is too small to hold the next line),
531 release the current buffer (whose contents would have been put in the
532 hold area) and repeat the process with another large buffer until at least
533 one entire line has been read.
535 Return TRUE if a new buffer was obtained, otherwise false
536 (in which case end-of-file must have been encountered). */
541 struct buffer_record *b;
542 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
543 unsigned int bytes_avail; /* Size of new buffer created. */
544 unsigned int lines_found; /* Number of lines in this new buffer. */
545 char *p; /* Place to load into buffer. */
550 /* We must make the buffer at least as large as the amount of data
551 in the partial line left over from the last call. */
552 if (bytes_wanted < hold_count)
553 bytes_wanted = hold_count;
557 b = get_new_buffer (bytes_wanted);
558 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
561 /* First check the `holding' area for a partial line. */
565 memcpy (p, hold_area, hold_count);
567 b->bytes_used += hold_count;
568 bytes_avail -= hold_count;
572 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
574 lines_found = record_line_starts (b);
575 bytes_wanted = b->bytes_alloc * 2;
579 while (!lines_found && !have_read_eof);
584 return lines_found != 0;
587 /* Return the line number of the first line that has not yet been retrieved. */
590 get_first_line_in_buffer (void)
592 if (head == NULL && !load_buffer ())
593 error (EXIT_FAILURE, errno, _("input disappeared"));
595 return head->first_available;
598 /* Return a pointer to the logical first line in the buffer and make the
599 next line the logical first line.
600 Return NULL if there is no more input. */
602 static struct cstring *
605 struct cstring *line; /* Return value. */
606 struct line *l; /* For convenience. */
608 if (head == NULL && !load_buffer ())
611 if (current_line < head->first_available)
612 current_line = head->first_available;
614 ++(head->first_available);
618 line = &l->starts[l->retrieve_index];
620 /* Advance index to next line. */
621 if (++l->retrieve_index == l->used)
623 /* Go on to the next line record. */
624 head->curr_line = l->next;
625 if (head->curr_line == NULL || head->curr_line->used == 0)
627 /* Go on to the next data block. */
628 struct buffer_record *b = head;
637 /* Search the buffers for line LINENUM, reading more input if necessary.
638 Return a pointer to the line, or NULL if it is not found in the file. */
640 static struct cstring *
641 find_line (unsigned int linenum)
643 struct buffer_record *b;
645 if (head == NULL && !load_buffer ())
648 if (linenum < head->start_line)
653 if (linenum < b->start_line + b->num_lines)
655 /* The line is in this buffer. */
657 unsigned int offset; /* How far into the buffer the line is. */
660 offset = linenum - b->start_line;
661 /* Find the control record. */
662 while (offset >= CTRL_SIZE)
667 return &l->starts[offset];
669 if (b->next == NULL && !load_buffer ())
671 b = b->next; /* Try the next data block. */
675 /* Return TRUE if at least one more line is available for input. */
680 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
683 /* Set the name of the input file to NAME and open it. */
686 set_input_file (const char *name)
688 if (STREQ (name, "-"))
692 input_desc = open (name, O_RDONLY);
694 error (EXIT_FAILURE, errno, "%s", name);
698 /* Write all lines from the beginning of the buffer up to, but
699 not including, line LAST_LINE, to the current output file.
700 If IGNORE is TRUE, do not output lines selected here.
701 ARGNUM is the index in ARGV of the current pattern. */
704 write_to_file (unsigned int last_line, boolean ignore, int argnum)
706 struct cstring *line;
707 unsigned int first_line; /* First available input line. */
708 unsigned int lines; /* Number of lines to output. */
711 first_line = get_first_line_in_buffer ();
713 if (first_line > last_line)
715 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
719 lines = last_line - first_line;
721 for (i = 0; i < lines; i++)
723 line = remove_line ();
726 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
730 save_line_to_file (line);
734 /* Output any lines left after all regexps have been processed. */
737 dump_rest_of_file (void)
739 struct cstring *line;
741 while ((line = remove_line ()) != NULL)
742 save_line_to_file (line);
745 /* Handle an attempt to read beyond EOF under the control of record P,
746 on iteration REPETITION if nonzero. */
749 handle_line_error (const struct control *p, int repetition)
751 fprintf (stderr, _("%s: `%d': line number out of range"),
752 program_name, p->lines_required);
754 fprintf (stderr, _(" on repetition %d\n"), repetition);
756 fprintf (stderr, "\n");
761 /* Determine the line number that marks the end of this file,
762 then get those lines and save them to the output file.
763 P is the control record.
764 REPETITION is the repetition number. */
767 process_line_count (const struct control *p, int repetition)
769 unsigned int linenum;
770 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
771 struct cstring *line;
773 create_output_file ();
775 linenum = get_first_line_in_buffer ();
777 /* Initially, I wanted to assert linenum < last_line_to_save, but that
778 condition is false for the valid command: echo | csplit - 1 '{*}'.
779 So, relax it just a little. */
780 assert ((linenum == 1 && last_line_to_save == 1)
781 || linenum < last_line_to_save);
783 while (linenum++ < last_line_to_save)
785 line = remove_line ();
787 handle_line_error (p, repetition);
788 save_line_to_file (line);
791 close_output_file ();
793 /* Ensure that the line number specified is not 1 greater than
794 the number of lines in the file. */
795 if (no_more_lines ())
796 handle_line_error (p, repetition);
800 regexp_error (struct control *p, int repetition, boolean ignore)
802 fprintf (stderr, _("%s: `%s': match not found"),
803 program_name, global_argv[p->argnum]);
806 fprintf (stderr, _(" on repetition %d\n"), repetition);
808 fprintf (stderr, "\n");
812 dump_rest_of_file ();
813 close_output_file ();
818 /* Read the input until a line matches the regexp in P, outputting
819 it unless P->IGNORE is TRUE.
820 REPETITION is this repeat-count; 0 means the first time. */
823 process_regexp (struct control *p, int repetition)
825 struct cstring *line; /* From input file. */
826 unsigned int line_len; /* To make "$" in regexps work. */
827 unsigned int break_line; /* First line number of next file. */
828 boolean ignore = p->ignore; /* If TRUE, skip this section. */
832 create_output_file ();
834 /* If there is no offset for the regular expression, or
835 it is positive, then it is not necessary to buffer the lines. */
841 line = find_line (++current_line);
844 if (p->repeat_forever)
848 dump_rest_of_file ();
849 close_output_file ();
854 regexp_error (p, repetition, ignore);
856 line_len = line->len;
857 if (line->str[line_len - 1] == '\n')
859 ret = re_search (&p->re_compiled, line->str, line_len,
860 0, line_len, (struct re_registers *) 0);
863 error (0, 0, _("error in regular expression search"));
868 line = remove_line ();
870 save_line_to_file (line);
878 /* Buffer the lines. */
881 line = find_line (++current_line);
884 if (p->repeat_forever)
888 dump_rest_of_file ();
889 close_output_file ();
894 regexp_error (p, repetition, ignore);
896 line_len = line->len;
897 if (line->str[line_len - 1] == '\n')
899 ret = re_search (&p->re_compiled, line->str, line_len,
900 0, line_len, (struct re_registers *) 0);
903 error (0, 0, _("error in regular expression search"));
911 /* Account for any offset from this regexp. */
912 break_line = current_line + p->offset;
914 write_to_file (break_line, ignore, p->argnum);
917 close_output_file ();
920 current_line = break_line;
923 /* Split the input file according to the control records we have built. */
930 for (i = 0; i < control_used; i++)
932 if (controls[i].regexpr)
934 for (j = 0; (controls[i].repeat_forever
935 || j <= controls[i].repeat); j++)
936 process_regexp (&controls[i], j);
940 for (j = 0; (controls[i].repeat_forever
941 || j <= controls[i].repeat); j++)
942 process_line_count (&controls[i], j);
946 create_output_file ();
947 dump_rest_of_file ();
948 close_output_file ();
951 /* Return the name of output file number NUM. */
954 make_filename (unsigned int num)
956 strcpy (filename_space, prefix);
958 sprintf (filename_space+strlen(prefix), suffix, num);
960 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
961 return filename_space;
964 /* Create the next output file. */
967 create_output_file (void)
969 output_filename = make_filename (files_created);
970 output_stream = fopen (output_filename, "w");
971 if (output_stream == NULL)
973 error (0, errno, "%s", output_filename);
980 /* Delete all the files we have created. */
983 delete_all_files (void)
988 for (i = 0; i < files_created; i++)
990 name = make_filename (i);
992 error (0, errno, "%s", name);
996 /* Close the current output file and print the count
997 of characters in this file. */
1000 close_output_file (void)
1004 if (ferror (output_stream) || fclose (output_stream) == EOF)
1006 error (0, errno, _("write error for `%s'"), output_filename);
1007 output_stream = NULL;
1010 if (bytes_written == 0 && elide_empty_files)
1012 if (unlink (output_filename))
1013 error (0, errno, "%s", output_filename);
1018 /* FIXME: if we write to stdout here, we have to close stdout
1019 and check for errors. */
1020 if (!suppress_count)
1021 fprintf (stdout, "%d\n", bytes_written);
1023 output_stream = NULL;
1027 /* Save line LINE to the output file and
1028 increment the character count for the current file. */
1031 save_line_to_file (const struct cstring *line)
1033 fwrite (line->str, sizeof (char), line->len, output_stream);
1034 bytes_written += line->len;
1037 /* Return a new, initialized control record. */
1039 static struct control *
1040 new_control_record (void)
1042 static unsigned control_allocated = 0; /* Total space allocated. */
1045 if (control_allocated == 0)
1047 control_allocated = ALLOC_SIZE;
1048 controls = (struct control *)
1049 xmalloc (sizeof (struct control) * control_allocated);
1051 else if (control_used == control_allocated)
1053 control_allocated += ALLOC_SIZE;
1054 controls = (struct control *)
1055 xrealloc ((char *) controls,
1056 sizeof (struct control) * control_allocated);
1058 p = &controls[control_used++];
1061 p->repeat_forever = 0;
1062 p->lines_required = 0;
1067 /* Check if there is a numeric offset after a regular expression.
1068 STR is the entire command line argument.
1069 P is the control record for this regular expression.
1070 NUM is the numeric part of STR. */
1073 check_for_offset (struct control *p, const char *str, const char *num)
1077 if (*num != '-' && *num != '+')
1078 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1080 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1082 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1083 p->offset = (unsigned int) val;
1086 p->offset = -p->offset;
1089 /* Given that the first character of command line arg STR is '{',
1090 make sure that the rest of the string is a valid repeat count
1091 and store its value in P.
1092 ARGNUM is the ARGV index of STR. */
1095 parse_repeat_count (int argnum, struct control *p, char *str)
1100 end = str + strlen (str) - 1;
1102 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1105 if (str+1 == end-1 && *(str+1) == '*')
1106 p->repeat_forever = 1;
1109 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1112 error (EXIT_FAILURE, 0,
1113 _("%s}: integer required between `{' and `}'"),
1114 global_argv[argnum]);
1116 p->repeat = (unsigned int) val;
1122 /* Extract the regular expression from STR and check for a numeric offset.
1123 STR should start with the regexp delimiter character.
1124 Return a new control record for the regular expression.
1125 ARGNUM is the ARGV index of STR.
1126 Unless IGNORE is TRUE, mark these lines for output. */
1128 static struct control *
1129 extract_regexp (int argnum, boolean ignore, char *str)
1131 int len; /* Number of chars in this regexp. */
1133 char *closing_delim;
1137 closing_delim = strrchr (str + 1, delim);
1138 if (closing_delim == NULL)
1139 error (EXIT_FAILURE, 0,
1140 _("%s: closing delimeter `%c' missing"), str, delim);
1142 len = closing_delim - str - 1;
1143 p = new_control_record ();
1147 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1148 strncpy (p->regexpr, str + 1, len);
1149 p->re_compiled.allocated = len * 2;
1150 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1151 p->re_compiled.fastmap = xmalloc (256);
1152 p->re_compiled.translate = 0;
1154 p->re_compiled.syntax_parens = 0;
1156 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1159 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1163 if (closing_delim[1])
1164 check_for_offset (p, str, closing_delim + 1);
1169 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1170 After each pattern, check if the next argument is a repeat count. */
1173 parse_patterns (int argc, int start, char **argv)
1175 int i; /* Index into ARGV. */
1176 struct control *p; /* New control record created. */
1178 static unsigned long last_val = 0;
1180 for (i = start; i < argc; i++)
1182 if (*argv[i] == '/' || *argv[i] == '%')
1184 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1188 p = new_control_record ();
1191 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1193 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1195 error (EXIT_FAILURE, 0,
1196 _("%s: line number must be greater than zero"),
1199 error (EXIT_FAILURE, 0,
1200 _("line number `%s' is smaller than preceding line number, %lu"),
1203 if (val == last_val)
1205 _("warning: line number `%s' is the same as preceding line number"),
1209 p->lines_required = (int) val;
1212 if (i + 1 < argc && *argv[i + 1] == '{')
1214 /* We have a repeat count. */
1216 parse_repeat_count (i, p, argv[i]);
1222 get_format_flags (char **format_ptr)
1226 for (; **format_ptr; (*format_ptr)++)
1228 switch (**format_ptr)
1239 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1250 get_format_width (char **format_ptr)
1256 start = *format_ptr;
1257 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1260 ch_save = **format_ptr;
1261 **format_ptr = '\0';
1262 /* In the case where no minimum field width is explicitly specified,
1263 allow for enough octal digits to represent the value of LONG_MAX. */
1264 count = ((*format_ptr == start)
1265 ? bytes_to_octal_digits[sizeof (long)]
1267 **format_ptr = ch_save;
1272 get_format_prec (char **format_ptr)
1279 if (**format_ptr != '.')
1283 if (**format_ptr == '-' || **format_ptr == '+')
1285 is_negative = (**format_ptr == '-');
1293 start = *format_ptr;
1294 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1297 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1300 start = *format_ptr;
1302 ch_save = **format_ptr;
1303 **format_ptr = '\0';
1304 count = (*format_ptr == start) ? 11 : atoi (start);
1305 **format_ptr = ch_save;
1311 get_format_conv_type (char **format_ptr)
1313 int ch = *((*format_ptr)++);
1326 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1331 error (EXIT_FAILURE, 0,
1332 _("invalid conversion specifier in suffix: %c"), ch);
1334 error (EXIT_FAILURE, 0,
1335 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1340 max_out (char *format)
1342 unsigned out_count = 0;
1343 unsigned percents = 0;
1354 out_count += get_format_flags (&format);
1356 int width = get_format_width (&format);
1357 int prec = get_format_prec (&format);
1359 out_count += MAX (width, prec);
1361 get_format_conv_type (&format);
1366 error (EXIT_FAILURE, 0,
1367 _("missing %% conversion specification in suffix"));
1368 else if (percents > 1)
1369 error (EXIT_FAILURE, 0,
1370 _("too many %% conversion specifications in suffix"));
1376 main (int argc, char **argv)
1381 struct sigaction oldact, newact;
1384 program_name = argv[0];
1385 setlocale (LC_ALL, "");
1386 bindtextdomain (PACKAGE, LOCALEDIR);
1387 textdomain (PACKAGE);
1392 suppress_count = FALSE;
1393 remove_files = TRUE;
1394 prefix = DEFAULT_PREFIX;
1396 /* Change the way xmalloc and xrealloc fail. */
1397 xalloc_fail_func = cleanup;
1400 newact.sa_handler = interrupt_handler;
1401 sigemptyset (&newact.sa_mask);
1402 newact.sa_flags = 0;
1404 sigaction (SIGHUP, NULL, &oldact);
1405 if (oldact.sa_handler != SIG_IGN)
1406 sigaction (SIGHUP, &newact, NULL);
1408 sigaction (SIGINT, NULL, &oldact);
1409 if (oldact.sa_handler != SIG_IGN)
1410 sigaction (SIGINT, &newact, NULL);
1412 sigaction (SIGQUIT, NULL, &oldact);
1413 if (oldact.sa_handler != SIG_IGN)
1414 sigaction (SIGQUIT, &newact, NULL);
1416 sigaction (SIGTERM, NULL, &oldact);
1417 if (oldact.sa_handler != SIG_IGN)
1418 sigaction (SIGTERM, &newact, NULL);
1419 #else /* not SA_INTERRUPT */
1420 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1421 signal (SIGHUP, interrupt_handler);
1422 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1423 signal (SIGINT, interrupt_handler);
1424 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1425 signal (SIGQUIT, interrupt_handler);
1426 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1427 signal (SIGTERM, interrupt_handler);
1428 #endif /* not SA_INTERRUPT */
1430 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1445 remove_files = FALSE;
1449 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1451 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1457 suppress_count = TRUE;
1461 elide_empty_files = TRUE;
1470 printf ("csplit (%s) %s\n", GNU_PACKAGE, VERSION);
1471 exit (EXIT_SUCCESS);
1477 if (argc - optind < 2)
1479 error (0, 0, _("too few arguments"));
1484 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1486 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1488 set_input_file (argv[optind++]);
1490 parse_patterns (argc, optind, argv);
1494 if (close (input_desc) < 0)
1496 error (0, errno, _("read error"));
1500 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1501 error (EXIT_FAILURE, errno, _("write error"));
1503 exit (EXIT_SUCCESS);
1510 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1515 Usage: %s [OPTION]... FILE PATTERN...\n\
1519 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1520 and output byte counts of each piece to standard output.\n\
1522 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1523 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1524 -k, --keep-files do not remove output files on errors\n\
1525 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1526 -s, --quiet, --silent do not print counts of output file sizes\n\
1527 -z, --elide-empty-files remove empty output files\n\
1528 --help display this help and exit\n\
1529 --version output version information and exit\n\
1531 Read standard input if FILE is -. Each PATTERN may be:\n\
1533 INTEGER copy up to but not including specified line number\n\
1534 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1535 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1536 {INTEGER} repeat the previous pattern specified number of times\n\
1537 {*} repeat the previous pattern as many times as possible\n\
1539 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1541 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
1543 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);