1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 /* Disable assertions. Some systems have broken assert macros. */
30 #include <sys/types.h>
50 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
58 /* Increment size of area for control records. */
61 /* The default prefix for output file names. */
62 #define DEFAULT_PREFIX "xx"
66 /* A compiled pattern arg. */
69 char *regexpr; /* Non-compiled regular expression. */
70 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
71 int offset; /* Offset from regexp to split at. */
72 int lines_required; /* Number of lines required. */
73 unsigned int repeat; /* Repeat count. */
74 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
75 int argnum; /* ARGV index. */
76 boolean ignore; /* If true, produce no output (for regexp). */
79 /* Initial size of data area in buffers. */
80 #define START_SIZE 8191
82 /* Increment size for data area. */
83 #define INCR_SIZE 2048
85 /* Number of lines kept in each node in line list. */
89 /* Some small values to test the algorithms. */
90 # define START_SIZE 200
95 /* A string with a length count. */
102 /* Pointers to the beginnings of lines in the buffer area.
103 These structures are linked together if needed. */
106 unsigned used; /* Number of offsets used in this struct. */
107 unsigned insert_index; /* Next offset to use when inserting line. */
108 unsigned retrieve_index; /* Next index to use when retrieving line. */
109 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
110 struct line *next; /* Next in linked list. */
113 /* The structure to hold the input lines.
114 Contains a pointer to the data area and a list containing
115 pointers to the individual lines. */
118 unsigned bytes_alloc; /* Size of the buffer area. */
119 unsigned bytes_used; /* Bytes used in the buffer area. */
120 unsigned start_line; /* First line number in this buffer. */
121 unsigned first_available; /* First line that can be retrieved. */
122 unsigned num_lines; /* Number of complete lines in this buffer. */
123 char *buffer; /* Data area. */
124 struct line *line_start; /* Head of list of pointers to lines. */
125 struct line *curr_line; /* The line start record currently in use. */
126 struct buffer_record *next;
131 static void close_output_file PARAMS ((void));
132 static void create_output_file PARAMS ((void));
133 static void delete_all_files PARAMS ((void));
134 static void save_line_to_file PARAMS ((const struct cstring *line));
135 static void usage PARAMS ((int status));
137 /* The name this program was run with. */
140 /* Convert the number of 8-bit bytes of a binary representation to
141 the number of characters required to represent the same quantity
142 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
143 require a field width as wide as 11 characters. */
144 static const unsigned int bytes_to_octal_digits[] =
145 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
147 /* Input file descriptor. */
148 static int input_desc = 0;
150 /* List of available buffers. */
151 static struct buffer_record *free_list = NULL;
153 /* Start of buffer list. */
154 static struct buffer_record *head = NULL;
156 /* Partially read line. */
157 static char *hold_area = NULL;
159 /* Number of chars in `hold_area'. */
160 static unsigned hold_count = 0;
162 /* Number of the last line in the buffers. */
163 static unsigned last_line_number = 0;
165 /* Number of the line currently being examined. */
166 static unsigned current_line = 0;
168 /* If TRUE, we have read EOF. */
169 static boolean have_read_eof = FALSE;
171 /* Name of output files. */
172 static char *filename_space = NULL;
174 /* Prefix part of output file names. */
175 static char *prefix = NULL;
177 /* Suffix part of output file names. */
178 static char *suffix = NULL;
180 /* Number of digits to use in output file names. */
181 static int digits = 2;
183 /* Number of files created so far. */
184 static unsigned int files_created = 0;
186 /* Number of bytes written to current file. */
187 static unsigned int bytes_written;
189 /* Output file pointer. */
190 static FILE *output_stream = NULL;
192 /* Output file name. */
193 static char *output_filename = NULL;
195 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
196 static char **global_argv;
198 /* If TRUE, do not print the count of bytes in each output file. */
199 static boolean suppress_count;
201 /* If TRUE, remove output files on error. */
202 static boolean remove_files;
204 /* If TRUE, remove all output files which have a zero length. */
205 static boolean elide_empty_files;
207 /* The compiled pattern arguments, which determine how to split
209 static struct control *controls;
211 /* Number of elements in `controls'. */
212 static unsigned int control_used;
214 /* If nonzero, display usage information and exit. */
215 static int show_help;
217 /* If nonzero, print the version on standard output then exit. */
218 static int show_version;
220 static struct option const longopts[] =
222 {"digits", required_argument, NULL, 'n'},
223 {"quiet", no_argument, NULL, 'q'},
224 {"silent", no_argument, NULL, 's'},
225 {"keep-files", no_argument, NULL, 'k'},
226 {"elide-empty-files", no_argument, NULL, 'z'},
227 {"prefix", required_argument, NULL, 'f'},
228 {"suffix-format", required_argument, NULL, 'b'},
229 {"help", no_argument, &show_help, 1},
230 {"version", no_argument, &show_version, 1},
234 /* Optionally remove files created so far; then exit.
235 Called when an error detected. */
241 close_output_file ();
255 interrupt_handler (int sig)
258 struct sigaction sigact;
260 sigact.sa_handler = SIG_DFL;
261 sigemptyset (&sigact.sa_mask);
263 sigaction (sig, &sigact, NULL);
264 #else /* !SA_INTERRUPT */
265 signal (sig, SIG_DFL);
266 #endif /* SA_INTERRUPT */
268 kill (getpid (), sig);
271 /* Keep track of NUM chars of a partial line in buffer START.
272 These chars will be retrieved later when another large buffer is read.
273 It is not necessary to create a new buffer for these chars; instead,
274 we keep a pointer to the existing buffer. This buffer *is* on the
275 free list, and when the next buffer is obtained from this list
276 (even if it is this one), these chars will be placed at the
277 start of the new buffer. */
280 save_to_hold_area (char *start, unsigned int num)
286 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
287 Return the number of chars read. */
290 read_input (char *dest, unsigned int max_n_bytes)
294 if (max_n_bytes == 0)
297 bytes_read = safe_read (input_desc, dest, max_n_bytes);
300 have_read_eof = TRUE;
304 error (0, errno, _("read error"));
311 /* Initialize existing line record P. */
314 clear_line_control (struct line *p)
318 p->retrieve_index = 0;
321 /* Initialize all line records in B. */
324 clear_all_line_control (struct buffer_record *b)
328 for (l = b->line_start; l; l = l->next)
329 clear_line_control (l);
332 /* Return a new, initialized line record. */
335 new_line_control (void)
339 p = (struct line *) xmalloc (sizeof (struct line));
342 clear_line_control (p);
347 /* Record LINE_START, which is the address of the start of a line
348 of length LINE_LEN in the large buffer, in the lines buffer of B. */
351 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
355 /* If there is no existing area to keep line info, get some. */
356 if (b->line_start == NULL)
357 b->line_start = b->curr_line = new_line_control ();
359 /* If existing area for lines is full, get more. */
360 if (b->curr_line->used == CTRL_SIZE)
362 b->curr_line->next = new_line_control ();
363 b->curr_line = b->curr_line->next;
368 /* Record the start of the line, and update counters. */
369 l->starts[l->insert_index].str = line_start;
370 l->starts[l->insert_index].len = line_len;
375 /* Scan the buffer in B for newline characters
376 and record the line start locations and lengths in B.
377 Return the number of lines found in this buffer.
379 There may be an incomplete line at the end of the buffer;
380 a pointer is kept to this area, which will be used when
381 the next buffer is filled. */
384 record_line_starts (struct buffer_record *b)
386 char *line_start; /* Start of current line. */
387 char *line_end; /* End of each line found. */
388 unsigned int bytes_left; /* Length of incomplete last line. */
389 unsigned int lines; /* Number of lines found. */
390 unsigned int line_length; /* Length of each line found. */
392 if (b->bytes_used == 0)
396 line_start = b->buffer;
397 bytes_left = b->bytes_used;
401 line_end = memchr (line_start, '\n', bytes_left);
402 if (line_end == NULL)
404 line_length = line_end - line_start + 1;
405 keep_new_line (b, line_start, line_length);
406 bytes_left -= line_length;
407 line_start = line_end + 1;
411 /* Check for an incomplete last line. */
416 keep_new_line (b, line_start, bytes_left);
420 save_to_hold_area (line_start, bytes_left);
423 b->num_lines = lines;
424 b->first_available = b->start_line = last_line_number + 1;
425 last_line_number += lines;
430 /* Return a new buffer with room to store SIZE bytes, plus
431 an extra byte for safety. */
433 static struct buffer_record *
434 create_new_buffer (unsigned int size)
436 struct buffer_record *new_buffer;
438 new_buffer = (struct buffer_record *)
439 xmalloc (sizeof (struct buffer_record));
441 new_buffer->buffer = (char *) xmalloc (size + 1);
443 new_buffer->bytes_alloc = size;
444 new_buffer->line_start = new_buffer->curr_line = NULL;
449 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
450 least that size is currently free, use it, otherwise create a new one. */
452 static struct buffer_record *
453 get_new_buffer (unsigned int min_size)
455 struct buffer_record *p, *q;
456 struct buffer_record *new_buffer; /* Buffer to return. */
457 unsigned int alloc_size; /* Actual size that will be requested. */
459 alloc_size = START_SIZE;
460 while (min_size > alloc_size)
461 alloc_size += INCR_SIZE;
463 if (free_list == NULL)
464 new_buffer = create_new_buffer (alloc_size);
467 /* Use first-fit to find a buffer. */
468 p = new_buffer = NULL;
473 if (q->bytes_alloc >= min_size)
486 new_buffer = (q ? q : create_new_buffer (alloc_size));
488 new_buffer->curr_line = new_buffer->line_start;
489 clear_all_line_control (new_buffer);
492 new_buffer->num_lines = 0;
493 new_buffer->bytes_used = 0;
494 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
495 new_buffer->next = NULL;
500 /* Add buffer BUF to the list of free buffers. */
503 free_buffer (struct buffer_record *buf)
505 buf->next = free_list;
509 /* Append buffer BUF to the linked list of buffers that contain
510 some data yet to be processed. */
513 save_buffer (struct buffer_record *buf)
515 struct buffer_record *p;
518 buf->curr_line = buf->line_start;
524 for (p = head; p->next; p = p->next)
530 /* Fill a buffer of input.
532 Set the initial size of the buffer to a default.
533 Fill the buffer (from the hold area and input stream)
534 and find the individual lines.
535 If no lines are found (the buffer is too small to hold the next line),
536 release the current buffer (whose contents would have been put in the
537 hold area) and repeat the process with another large buffer until at least
538 one entire line has been read.
540 Return TRUE if a new buffer was obtained, otherwise false
541 (in which case end-of-file must have been encountered). */
546 struct buffer_record *b;
547 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
548 unsigned int bytes_avail; /* Size of new buffer created. */
549 unsigned int lines_found; /* Number of lines in this new buffer. */
550 char *p; /* Place to load into buffer. */
555 /* We must make the buffer at least as large as the amount of data
556 in the partial line left over from the last call. */
557 if (bytes_wanted < hold_count)
558 bytes_wanted = hold_count;
562 b = get_new_buffer (bytes_wanted);
563 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
566 /* First check the `holding' area for a partial line. */
570 memcpy (p, hold_area, hold_count);
572 b->bytes_used += hold_count;
573 bytes_avail -= hold_count;
577 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
579 lines_found = record_line_starts (b);
580 bytes_wanted = b->bytes_alloc * 2;
584 while (!lines_found && !have_read_eof);
589 return lines_found != 0;
592 /* Return the line number of the first line that has not yet been retrieved. */
595 get_first_line_in_buffer (void)
597 if (head == NULL && !load_buffer ())
598 error (EXIT_FAILURE, errno, _("input disappeared"));
600 return head->first_available;
603 /* Return a pointer to the logical first line in the buffer and make the
604 next line the logical first line.
605 Return NULL if there is no more input. */
607 static struct cstring *
610 struct cstring *line; /* Return value. */
611 struct line *l; /* For convenience. */
613 if (head == NULL && !load_buffer ())
616 if (current_line < head->first_available)
617 current_line = head->first_available;
619 ++(head->first_available);
623 line = &l->starts[l->retrieve_index];
625 /* Advance index to next line. */
626 if (++l->retrieve_index == l->used)
628 /* Go on to the next line record. */
629 head->curr_line = l->next;
630 if (head->curr_line == NULL || head->curr_line->used == 0)
632 /* Go on to the next data block. */
633 struct buffer_record *b = head;
642 /* Search the buffers for line LINENUM, reading more input if necessary.
643 Return a pointer to the line, or NULL if it is not found in the file. */
645 static struct cstring *
646 find_line (unsigned int linenum)
648 struct buffer_record *b;
650 if (head == NULL && !load_buffer ())
653 if (linenum < head->start_line)
658 if (linenum < b->start_line + b->num_lines)
660 /* The line is in this buffer. */
662 unsigned int offset; /* How far into the buffer the line is. */
665 offset = linenum - b->start_line;
666 /* Find the control record. */
667 while (offset >= CTRL_SIZE)
672 return &l->starts[offset];
674 if (b->next == NULL && !load_buffer ())
676 b = b->next; /* Try the next data block. */
680 /* Return TRUE if at least one more line is available for input. */
685 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
688 /* Set the name of the input file to NAME and open it. */
691 set_input_file (const char *name)
693 if (!strcmp (name, "-"))
697 input_desc = open (name, O_RDONLY);
699 error (EXIT_FAILURE, errno, "%s", name);
703 /* Write all lines from the beginning of the buffer up to, but
704 not including, line LAST_LINE, to the current output file.
705 If IGNORE is TRUE, do not output lines selected here.
706 ARGNUM is the index in ARGV of the current pattern. */
709 write_to_file (unsigned int last_line, boolean ignore, int argnum)
711 struct cstring *line;
712 unsigned int first_line; /* First available input line. */
713 unsigned int lines; /* Number of lines to output. */
716 first_line = get_first_line_in_buffer ();
718 if (first_line > last_line)
720 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
724 lines = last_line - first_line;
726 for (i = 0; i < lines; i++)
728 line = remove_line ();
731 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
735 save_line_to_file (line);
739 /* Output any lines left after all regexps have been processed. */
742 dump_rest_of_file (void)
744 struct cstring *line;
746 while ((line = remove_line ()) != NULL)
747 save_line_to_file (line);
750 /* Handle an attempt to read beyond EOF under the control of record P,
751 on iteration REPETITION if nonzero. */
754 handle_line_error (const struct control *p, int repetition)
756 fprintf (stderr, _("%s: `%d': line number out of range"),
757 program_name, p->lines_required);
759 fprintf (stderr, _(" on repetition %d\n"), repetition);
761 fprintf (stderr, "\n");
766 /* Determine the line number that marks the end of this file,
767 then get those lines and save them to the output file.
768 P is the control record.
769 REPETITION is the repetition number. */
772 process_line_count (const struct control *p, int repetition)
774 unsigned int linenum;
775 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
776 struct cstring *line;
778 create_output_file ();
780 linenum = get_first_line_in_buffer ();
782 /* Initially, I wanted to assert linenum < last_line_to_save, but that
783 condition is false for the valid command: echo | csplit - 1 '{*}'.
784 So, relax it just a little. */
785 assert ((linenum == 1 && last_line_to_save == 1)
786 || linenum < last_line_to_save);
788 while (linenum++ < last_line_to_save)
790 line = remove_line ();
792 handle_line_error (p, repetition);
793 save_line_to_file (line);
796 close_output_file ();
798 /* Ensure that the line number specified is not 1 greater than
799 the number of lines in the file. */
800 if (no_more_lines ())
801 handle_line_error (p, repetition);
805 regexp_error (struct control *p, int repetition, boolean ignore)
807 fprintf (stderr, _("%s: `%s': match not found"),
808 program_name, global_argv[p->argnum]);
811 fprintf (stderr, _(" on repetition %d\n"), repetition);
813 fprintf (stderr, "\n");
817 dump_rest_of_file ();
818 close_output_file ();
823 /* Read the input until a line matches the regexp in P, outputting
824 it unless P->IGNORE is TRUE.
825 REPETITION is this repeat-count; 0 means the first time. */
828 process_regexp (struct control *p, int repetition)
830 struct cstring *line; /* From input file. */
831 unsigned int line_len; /* To make "$" in regexps work. */
832 unsigned int break_line; /* First line number of next file. */
833 boolean ignore = p->ignore; /* If TRUE, skip this section. */
837 create_output_file ();
839 /* If there is no offset for the regular expression, or
840 it is positive, then it is not necessary to buffer the lines. */
846 line = find_line (++current_line);
849 if (p->repeat_forever)
853 dump_rest_of_file ();
854 close_output_file ();
859 regexp_error (p, repetition, ignore);
861 line_len = line->len;
862 if (line->str[line_len - 1] == '\n')
864 ret = re_search (&p->re_compiled, line->str, line_len,
865 0, line_len, (struct re_registers *) 0);
868 error (0, 0, _("error in regular expression search"));
873 line = remove_line ();
875 save_line_to_file (line);
883 /* Buffer the lines. */
886 line = find_line (++current_line);
889 if (p->repeat_forever)
893 dump_rest_of_file ();
894 close_output_file ();
899 regexp_error (p, repetition, ignore);
901 line_len = line->len;
902 if (line->str[line_len - 1] == '\n')
904 ret = re_search (&p->re_compiled, line->str, line_len,
905 0, line_len, (struct re_registers *) 0);
908 error (0, 0, _("error in regular expression search"));
916 /* Account for any offset from this regexp. */
917 break_line = current_line + p->offset;
919 write_to_file (break_line, ignore, p->argnum);
922 close_output_file ();
925 current_line = break_line;
928 /* Split the input file according to the control records we have built. */
935 for (i = 0; i < control_used; i++)
937 if (controls[i].regexpr)
939 for (j = 0; (controls[i].repeat_forever
940 || j <= controls[i].repeat); j++)
941 process_regexp (&controls[i], j);
945 for (j = 0; (controls[i].repeat_forever
946 || j <= controls[i].repeat); j++)
947 process_line_count (&controls[i], j);
951 create_output_file ();
952 dump_rest_of_file ();
953 close_output_file ();
956 /* Return the name of output file number NUM. */
959 make_filename (unsigned int num)
961 strcpy (filename_space, prefix);
963 sprintf (filename_space+strlen(prefix), suffix, num);
965 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
966 return filename_space;
969 /* Create the next output file. */
972 create_output_file (void)
974 output_filename = make_filename (files_created);
975 output_stream = fopen (output_filename, "w");
976 if (output_stream == NULL)
978 error (0, errno, "%s", output_filename);
985 /* Delete all the files we have created. */
988 delete_all_files (void)
993 for (i = 0; i < files_created; i++)
995 name = make_filename (i);
997 error (0, errno, "%s", name);
1001 /* Close the current output file and print the count
1002 of characters in this file. */
1005 close_output_file (void)
1009 if (ferror (output_stream) || fclose (output_stream) == EOF)
1011 error (0, errno, _("write error for `%s'"), output_filename);
1012 output_stream = NULL;
1015 if (bytes_written == 0 && elide_empty_files)
1017 if (unlink (output_filename))
1018 error (0, errno, "%s", output_filename);
1023 /* FIXME: if we write to stdout here, we have to close stdout
1024 and check for errors. */
1025 if (!suppress_count)
1026 fprintf (stdout, "%d\n", bytes_written);
1028 output_stream = NULL;
1032 /* Save line LINE to the output file and
1033 increment the character count for the current file. */
1036 save_line_to_file (const struct cstring *line)
1038 fwrite (line->str, sizeof (char), line->len, output_stream);
1039 bytes_written += line->len;
1042 /* Return a new, initialized control record. */
1044 static struct control *
1045 new_control_record (void)
1047 static unsigned control_allocated = 0; /* Total space allocated. */
1050 if (control_allocated == 0)
1052 control_allocated = ALLOC_SIZE;
1053 controls = (struct control *)
1054 xmalloc (sizeof (struct control) * control_allocated);
1056 else if (control_used == control_allocated)
1058 control_allocated += ALLOC_SIZE;
1059 controls = (struct control *)
1060 xrealloc ((char *) controls,
1061 sizeof (struct control) * control_allocated);
1063 p = &controls[control_used++];
1066 p->repeat_forever = 0;
1067 p->lines_required = 0;
1072 /* Check if there is a numeric offset after a regular expression.
1073 STR is the entire command line argument.
1074 P is the control record for this regular expression.
1075 NUM is the numeric part of STR. */
1078 check_for_offset (struct control *p, const char *str, const char *num)
1082 if (*num != '-' && *num != '+')
1083 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1085 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1087 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1088 p->offset = (unsigned int) val;
1091 p->offset = -p->offset;
1094 /* Given that the first character of command line arg STR is '{',
1095 make sure that the rest of the string is a valid repeat count
1096 and store its value in P.
1097 ARGNUM is the ARGV index of STR. */
1100 parse_repeat_count (int argnum, struct control *p, char *str)
1105 end = str + strlen (str) - 1;
1107 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1110 if (str+1 == end-1 && *(str+1) == '*')
1111 p->repeat_forever = 1;
1114 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1117 error (EXIT_FAILURE, 0,
1118 _("%s}: integer required between `{' and `}'"),
1119 global_argv[argnum]);
1121 p->repeat = (unsigned int) val;
1127 /* Extract the regular expression from STR and check for a numeric offset.
1128 STR should start with the regexp delimiter character.
1129 Return a new control record for the regular expression.
1130 ARGNUM is the ARGV index of STR.
1131 Unless IGNORE is TRUE, mark these lines for output. */
1133 static struct control *
1134 extract_regexp (int argnum, boolean ignore, char *str)
1136 int len; /* Number of chars in this regexp. */
1138 char *closing_delim;
1142 closing_delim = strrchr (str + 1, delim);
1143 if (closing_delim == NULL)
1144 error (EXIT_FAILURE, 0,
1145 _("%s: closing delimeter `%c' missing"), str, delim);
1147 len = closing_delim - str - 1;
1148 p = new_control_record ();
1152 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1153 strncpy (p->regexpr, str + 1, len);
1154 p->re_compiled.allocated = len * 2;
1155 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1156 p->re_compiled.fastmap = xmalloc (256);
1157 p->re_compiled.translate = 0;
1159 p->re_compiled.syntax_parens = 0;
1161 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1164 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1168 if (closing_delim[1])
1169 check_for_offset (p, str, closing_delim + 1);
1174 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1175 After each pattern, check if the next argument is a repeat count. */
1178 parse_patterns (int argc, int start, char **argv)
1180 int i; /* Index into ARGV. */
1181 struct control *p; /* New control record created. */
1183 static unsigned long last_val = 0;
1185 for (i = start; i < argc; i++)
1187 if (*argv[i] == '/' || *argv[i] == '%')
1189 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1193 p = new_control_record ();
1196 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1198 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1200 error (EXIT_FAILURE, 0,
1201 _("%s: line number must be greater than zero"),
1204 error (EXIT_FAILURE, 0,
1205 _("line number `%s' is smaller than preceding line number, %lu"),
1208 if (val == last_val)
1210 _("warning: line number `%s' is the same as preceding line number"),
1214 p->lines_required = (int) val;
1217 if (i + 1 < argc && *argv[i + 1] == '{')
1219 /* We have a repeat count. */
1221 parse_repeat_count (i, p, argv[i]);
1227 get_format_flags (char **format_ptr)
1231 for (; **format_ptr; (*format_ptr)++)
1233 switch (**format_ptr)
1244 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1255 get_format_width (char **format_ptr)
1261 start = *format_ptr;
1262 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1265 ch_save = **format_ptr;
1266 **format_ptr = '\0';
1267 /* In the case where no minimum field width is explicitly specified,
1268 allow for enough octal digits to represent the value of LONG_MAX. */
1269 count = ((*format_ptr == start)
1270 ? bytes_to_octal_digits[sizeof (long)]
1272 **format_ptr = ch_save;
1277 get_format_prec (char **format_ptr)
1284 if (**format_ptr != '.')
1288 if (**format_ptr == '-' || **format_ptr == '+')
1290 is_negative = (**format_ptr == '-');
1298 start = *format_ptr;
1299 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1302 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1305 start = *format_ptr;
1307 ch_save = **format_ptr;
1308 **format_ptr = '\0';
1309 count = (*format_ptr == start) ? 11 : atoi (start);
1310 **format_ptr = ch_save;
1316 get_format_conv_type (char **format_ptr)
1318 int ch = *((*format_ptr)++);
1331 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1336 error (EXIT_FAILURE, 0,
1337 _("invalid conversion specifier in suffix: %c"), ch);
1339 error (EXIT_FAILURE, 0,
1340 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1345 max_out (char *format)
1347 unsigned out_count = 0;
1348 unsigned percents = 0;
1359 out_count += get_format_flags (&format);
1361 int width = get_format_width (&format);
1362 int prec = get_format_prec (&format);
1364 out_count += MAX (width, prec);
1366 get_format_conv_type (&format);
1371 error (EXIT_FAILURE, 0,
1372 _("missing %% conversion specification in suffix"));
1373 else if (percents > 1)
1374 error (EXIT_FAILURE, 0,
1375 _("too many %% conversion specifications in suffix"));
1381 main (int argc, char **argv)
1386 struct sigaction oldact, newact;
1389 program_name = argv[0];
1390 setlocale (LC_ALL, "");
1391 bindtextdomain (PACKAGE, LOCALEDIR);
1392 textdomain (PACKAGE);
1397 suppress_count = FALSE;
1398 remove_files = TRUE;
1399 prefix = DEFAULT_PREFIX;
1401 /* Change the way xmalloc and xrealloc fail. */
1402 xalloc_fail_func = cleanup;
1405 newact.sa_handler = interrupt_handler;
1406 sigemptyset (&newact.sa_mask);
1407 newact.sa_flags = 0;
1409 sigaction (SIGHUP, NULL, &oldact);
1410 if (oldact.sa_handler != SIG_IGN)
1411 sigaction (SIGHUP, &newact, NULL);
1413 sigaction (SIGINT, NULL, &oldact);
1414 if (oldact.sa_handler != SIG_IGN)
1415 sigaction (SIGINT, &newact, NULL);
1417 sigaction (SIGQUIT, NULL, &oldact);
1418 if (oldact.sa_handler != SIG_IGN)
1419 sigaction (SIGQUIT, &newact, NULL);
1421 sigaction (SIGTERM, NULL, &oldact);
1422 if (oldact.sa_handler != SIG_IGN)
1423 sigaction (SIGTERM, &newact, NULL);
1424 #else /* not SA_INTERRUPT */
1425 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1426 signal (SIGHUP, interrupt_handler);
1427 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1428 signal (SIGINT, interrupt_handler);
1429 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1430 signal (SIGQUIT, interrupt_handler);
1431 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1432 signal (SIGTERM, interrupt_handler);
1433 #endif /* not SA_INTERRUPT */
1435 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1450 remove_files = FALSE;
1454 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1456 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1462 suppress_count = TRUE;
1466 elide_empty_files = TRUE;
1475 printf ("csplit (%s) %s\n", GNU_PACKAGE, VERSION);
1476 exit (EXIT_SUCCESS);
1482 if (argc - optind < 2)
1484 error (0, 0, _("too few arguments"));
1489 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1491 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1493 set_input_file (argv[optind++]);
1495 parse_patterns (argc, optind, argv);
1499 if (close (input_desc) < 0)
1501 error (0, errno, _("read error"));
1505 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1506 error (EXIT_FAILURE, errno, _("write error"));
1508 exit (EXIT_SUCCESS);
1515 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1520 Usage: %s [OPTION]... FILE PATTERN...\n\
1524 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1525 and output byte counts of each piece to standard output.\n\
1527 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1528 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1529 -k, --keep-files do not remove output files on errors\n\
1530 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1531 -s, --quiet, --silent do not print counts of output file sizes\n\
1532 -z, --elide-empty-files remove empty output files\n\
1533 --help display this help and exit\n\
1534 --version output version information and exit\n\
1536 Read standard input if FILE is -. Each PATTERN may be:\n\
1538 INTEGER copy up to but not including specified line number\n\
1539 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1540 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1541 {INTEGER} repeat the previous pattern specified number of times\n\
1542 {*} repeat the previous pattern as many times as possible\n\
1544 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1546 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
1548 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);