1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 95, 1996 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 /* Disable assertions. Some systems have broken assert macros. */
30 #include <sys/types.h>
34 #endif /* HAVE_LIMITS_H */
37 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
41 # define INT_MAX ((int) (UINT_MAX >> 1))
61 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
69 /* Increment size of area for control records. */
72 /* The default prefix for output file names. */
73 #define DEFAULT_PREFIX "xx"
77 /* A compiled pattern arg. */
80 char *regexpr; /* Non-compiled regular expression. */
81 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
82 int offset; /* Offset from regexp to split at. */
83 int lines_required; /* Number of lines required. */
84 unsigned int repeat; /* Repeat count. */
85 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
86 int argnum; /* ARGV index. */
87 boolean ignore; /* If true, produce no output (for regexp). */
90 /* Initial size of data area in buffers. */
91 #define START_SIZE 8191
93 /* Increment size for data area. */
94 #define INCR_SIZE 2048
96 /* Number of lines kept in each node in line list. */
100 /* Some small values to test the algorithms. */
101 #define START_SIZE 200
106 /* A string with a length count. */
113 /* Pointers to the beginnings of lines in the buffer area.
114 These structures are linked together if needed. */
117 unsigned used; /* Number of offsets used in this struct. */
118 unsigned insert_index; /* Next offset to use when inserting line. */
119 unsigned retrieve_index; /* Next index to use when retrieving line. */
120 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
121 struct line *next; /* Next in linked list. */
124 /* The structure to hold the input lines.
125 Contains a pointer to the data area and a list containing
126 pointers to the individual lines. */
129 unsigned bytes_alloc; /* Size of the buffer area. */
130 unsigned bytes_used; /* Bytes used in the buffer area. */
131 unsigned start_line; /* First line number in this buffer. */
132 unsigned first_available; /* First line that can be retrieved. */
133 unsigned num_lines; /* Number of complete lines in this buffer. */
134 char *buffer; /* Data area. */
135 struct line *line_start; /* Head of list of pointers to lines. */
136 struct line *curr_line; /* The line start record currently in use. */
137 struct buffer_record *next;
142 static void close_output_file __P ((void));
143 static void create_output_file __P ((void));
144 static void delete_all_files __P ((void));
145 static void save_line_to_file __P ((const struct cstring *line));
146 static void usage __P ((int status));
148 /* The name this program was run with. */
151 /* Convert the number of 8-bit bytes of a binary representation to
152 the number of characters required to represent the same quantity
153 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
154 require a field width as wide as 11 characters. */
155 static const unsigned int bytes_to_octal_digits[] =
156 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
158 /* Input file descriptor. */
159 static int input_desc = 0;
161 /* List of available buffers. */
162 static struct buffer_record *free_list = NULL;
164 /* Start of buffer list. */
165 static struct buffer_record *head = NULL;
167 /* Partially read line. */
168 static char *hold_area = NULL;
170 /* Number of chars in `hold_area'. */
171 static unsigned hold_count = 0;
173 /* Number of the last line in the buffers. */
174 static unsigned last_line_number = 0;
176 /* Number of the line currently being examined. */
177 static unsigned current_line = 0;
179 /* If TRUE, we have read EOF. */
180 static boolean have_read_eof = FALSE;
182 /* Name of output files. */
183 static char *filename_space = NULL;
185 /* Prefix part of output file names. */
186 static char *prefix = NULL;
188 /* Suffix part of output file names. */
189 static char *suffix = NULL;
191 /* Number of digits to use in output file names. */
192 static int digits = 2;
194 /* Number of files created so far. */
195 static unsigned int files_created = 0;
197 /* Number of bytes written to current file. */
198 static unsigned int bytes_written;
200 /* Output file pointer. */
201 static FILE *output_stream = NULL;
203 /* Output file name. */
204 static char *output_filename = NULL;
206 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
207 static char **global_argv;
209 /* If TRUE, do not print the count of bytes in each output file. */
210 static boolean suppress_count;
212 /* If TRUE, remove output files on error. */
213 static boolean remove_files;
215 /* If TRUE, remove all output files which have a zero length. */
216 static boolean elide_empty_files;
218 /* The compiled pattern arguments, which determine how to split
220 static struct control *controls;
222 /* Number of elements in `controls'. */
223 static unsigned int control_used;
225 /* If nonzero, display usage information and exit. */
226 static int show_help;
228 /* If nonzero, print the version on standard output then exit. */
229 static int show_version;
231 static struct option const longopts[] =
233 {"digits", required_argument, NULL, 'n'},
234 {"quiet", no_argument, NULL, 'q'},
235 {"silent", no_argument, NULL, 's'},
236 {"keep-files", no_argument, NULL, 'k'},
237 {"elide-empty-files", no_argument, NULL, 'z'},
238 {"prefix", required_argument, NULL, 'f'},
239 {"suffix-format", required_argument, NULL, 'b'},
240 {"help", no_argument, &show_help, 1},
241 {"version", no_argument, &show_version, 1},
245 /* Optionally remove files created so far; then exit.
246 Called when an error detected. */
252 close_output_file ();
266 interrupt_handler (int sig)
269 struct sigaction sigact;
271 sigact.sa_handler = SIG_DFL;
272 sigemptyset (&sigact.sa_mask);
274 sigaction (sig, &sigact, NULL);
275 #else /* !SA_INTERRUPT */
276 signal (sig, SIG_DFL);
277 #endif /* SA_INTERRUPT */
279 kill (getpid (), sig);
282 /* Allocate N bytes of memory dynamically, with error checking. */
285 xmalloc (unsigned int n)
292 error (0, 0, _("virtual memory exhausted"));
298 /* Change the size of an allocated block of memory P to N bytes,
300 If P is NULL, run xmalloc.
301 If N is 0, run free and return NULL. */
304 xrealloc (char *p, unsigned int n)
316 error (0, 0, _("virtual memory exhausted"));
322 /* Keep track of NUM chars of a partial line in buffer START.
323 These chars will be retrieved later when another large buffer is read.
324 It is not necessary to create a new buffer for these chars; instead,
325 we keep a pointer to the existing buffer. This buffer *is* on the
326 free list, and when the next buffer is obtained from this list
327 (even if it is this one), these chars will be placed at the
328 start of the new buffer. */
331 save_to_hold_area (char *start, unsigned int num)
337 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
338 Return the number of chars read. */
341 read_input (char *dest, unsigned int max_n_bytes)
345 if (max_n_bytes == 0)
348 bytes_read = safe_read (input_desc, dest, max_n_bytes);
351 have_read_eof = TRUE;
355 error (0, errno, _("read error"));
362 /* Initialize existing line record P. */
365 clear_line_control (struct line *p)
369 p->retrieve_index = 0;
372 /* Initialize all line records in B. */
375 clear_all_line_control (struct buffer_record *b)
379 for (l = b->line_start; l; l = l->next)
380 clear_line_control (l);
383 /* Return a new, initialized line record. */
386 new_line_control (void)
390 p = (struct line *) xmalloc (sizeof (struct line));
393 clear_line_control (p);
398 /* Record LINE_START, which is the address of the start of a line
399 of length LINE_LEN in the large buffer, in the lines buffer of B. */
402 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
406 /* If there is no existing area to keep line info, get some. */
407 if (b->line_start == NULL)
408 b->line_start = b->curr_line = new_line_control ();
410 /* If existing area for lines is full, get more. */
411 if (b->curr_line->used == CTRL_SIZE)
413 b->curr_line->next = new_line_control ();
414 b->curr_line = b->curr_line->next;
419 /* Record the start of the line, and update counters. */
420 l->starts[l->insert_index].str = line_start;
421 l->starts[l->insert_index].len = line_len;
426 /* Scan the buffer in B for newline characters
427 and record the line start locations and lengths in B.
428 Return the number of lines found in this buffer.
430 There may be an incomplete line at the end of the buffer;
431 a pointer is kept to this area, which will be used when
432 the next buffer is filled. */
435 record_line_starts (struct buffer_record *b)
437 char *line_start; /* Start of current line. */
438 char *line_end; /* End of each line found. */
439 unsigned int bytes_left; /* Length of incomplete last line. */
440 unsigned int lines; /* Number of lines found. */
441 unsigned int line_length; /* Length of each line found. */
443 if (b->bytes_used == 0)
447 line_start = b->buffer;
448 bytes_left = b->bytes_used;
452 line_end = memchr (line_start, '\n', bytes_left);
453 if (line_end == NULL)
455 line_length = line_end - line_start + 1;
456 keep_new_line (b, line_start, line_length);
457 bytes_left -= line_length;
458 line_start = line_end + 1;
462 /* Check for an incomplete last line. */
467 keep_new_line (b, line_start, bytes_left);
471 save_to_hold_area (line_start, bytes_left);
474 b->num_lines = lines;
475 b->first_available = b->start_line = last_line_number + 1;
476 last_line_number += lines;
481 /* Return a new buffer with room to store SIZE bytes, plus
482 an extra byte for safety. */
484 static struct buffer_record *
485 create_new_buffer (unsigned int size)
487 struct buffer_record *new_buffer;
489 new_buffer = (struct buffer_record *)
490 xmalloc (sizeof (struct buffer_record));
492 new_buffer->buffer = (char *) xmalloc (size + 1);
494 new_buffer->bytes_alloc = size;
495 new_buffer->line_start = new_buffer->curr_line = NULL;
500 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
501 least that size is currently free, use it, otherwise create a new one. */
503 static struct buffer_record *
504 get_new_buffer (unsigned int min_size)
506 struct buffer_record *p, *q;
507 struct buffer_record *new_buffer; /* Buffer to return. */
508 unsigned int alloc_size; /* Actual size that will be requested. */
510 alloc_size = START_SIZE;
511 while (min_size > alloc_size)
512 alloc_size += INCR_SIZE;
514 if (free_list == NULL)
515 new_buffer = create_new_buffer (alloc_size);
518 /* Use first-fit to find a buffer. */
519 p = new_buffer = NULL;
524 if (q->bytes_alloc >= min_size)
537 new_buffer = (q ? q : create_new_buffer (alloc_size));
539 new_buffer->curr_line = new_buffer->line_start;
540 clear_all_line_control (new_buffer);
543 new_buffer->num_lines = 0;
544 new_buffer->bytes_used = 0;
545 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
546 new_buffer->next = NULL;
551 /* Add buffer BUF to the list of free buffers. */
554 free_buffer (struct buffer_record *buf)
556 buf->next = free_list;
560 /* Append buffer BUF to the linked list of buffers that contain
561 some data yet to be processed. */
564 save_buffer (struct buffer_record *buf)
566 struct buffer_record *p;
569 buf->curr_line = buf->line_start;
575 for (p = head; p->next; p = p->next)
581 /* Fill a buffer of input.
583 Set the initial size of the buffer to a default.
584 Fill the buffer (from the hold area and input stream)
585 and find the individual lines.
586 If no lines are found (the buffer is too small to hold the next line),
587 release the current buffer (whose contents would have been put in the
588 hold area) and repeat the process with another large buffer until at least
589 one entire line has been read.
591 Return TRUE if a new buffer was obtained, otherwise false
592 (in which case end-of-file must have been encountered). */
597 struct buffer_record *b;
598 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
599 unsigned int bytes_avail; /* Size of new buffer created. */
600 unsigned int lines_found; /* Number of lines in this new buffer. */
601 char *p; /* Place to load into buffer. */
606 /* We must make the buffer at least as large as the amount of data
607 in the partial line left over from the last call. */
608 if (bytes_wanted < hold_count)
609 bytes_wanted = hold_count;
613 b = get_new_buffer (bytes_wanted);
614 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
617 /* First check the `holding' area for a partial line. */
621 memcpy (p, hold_area, hold_count);
623 b->bytes_used += hold_count;
624 bytes_avail -= hold_count;
628 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
630 lines_found = record_line_starts (b);
631 bytes_wanted = b->bytes_alloc * 2;
635 while (!lines_found && !have_read_eof);
640 return lines_found != 0;
643 /* Return the line number of the first line that has not yet been retrieved. */
646 get_first_line_in_buffer (void)
648 if (head == NULL && !load_buffer ())
649 error (EXIT_FAILURE, errno, _("input disappeared"));
651 return head->first_available;
654 /* Return a pointer to the logical first line in the buffer and make the
655 next line the logical first line.
656 Return NULL if there is no more input. */
658 static struct cstring *
661 struct cstring *line; /* Return value. */
662 struct line *l; /* For convenience. */
664 if (head == NULL && !load_buffer ())
667 if (current_line < head->first_available)
668 current_line = head->first_available;
670 ++(head->first_available);
674 line = &l->starts[l->retrieve_index];
676 /* Advance index to next line. */
677 if (++l->retrieve_index == l->used)
679 /* Go on to the next line record. */
680 head->curr_line = l->next;
681 if (head->curr_line == NULL || head->curr_line->used == 0)
683 /* Go on to the next data block. */
684 struct buffer_record *b = head;
693 /* Search the buffers for line LINENUM, reading more input if necessary.
694 Return a pointer to the line, or NULL if it is not found in the file. */
696 static struct cstring *
697 find_line (unsigned int linenum)
699 struct buffer_record *b;
701 if (head == NULL && !load_buffer ())
704 if (linenum < head->start_line)
709 if (linenum < b->start_line + b->num_lines)
711 /* The line is in this buffer. */
713 unsigned int offset; /* How far into the buffer the line is. */
716 offset = linenum - b->start_line;
717 /* Find the control record. */
718 while (offset >= CTRL_SIZE)
723 return &l->starts[offset];
725 if (b->next == NULL && !load_buffer ())
727 b = b->next; /* Try the next data block. */
731 /* Return TRUE if at least one more line is available for input. */
736 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
739 /* Set the name of the input file to NAME and open it. */
742 set_input_file (const char *name)
744 if (!strcmp (name, "-"))
748 input_desc = open (name, O_RDONLY);
750 error (EXIT_FAILURE, errno, "%s", name);
754 /* Write all lines from the beginning of the buffer up to, but
755 not including, line LAST_LINE, to the current output file.
756 If IGNORE is TRUE, do not output lines selected here.
757 ARGNUM is the index in ARGV of the current pattern. */
760 write_to_file (unsigned int last_line, boolean ignore, int argnum)
762 struct cstring *line;
763 unsigned int first_line; /* First available input line. */
764 unsigned int lines; /* Number of lines to output. */
767 first_line = get_first_line_in_buffer ();
769 if (first_line > last_line)
771 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
775 lines = last_line - first_line;
777 for (i = 0; i < lines; i++)
779 line = remove_line ();
782 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
786 save_line_to_file (line);
790 /* Output any lines left after all regexps have been processed. */
793 dump_rest_of_file (void)
795 struct cstring *line;
797 while ((line = remove_line ()) != NULL)
798 save_line_to_file (line);
801 /* Handle an attempt to read beyond EOF under the control of record P,
802 on iteration REPETITION if nonzero. */
805 handle_line_error (const struct control *p, int repetition)
807 fprintf (stderr, _("%s: `%d': line number out of range"),
808 program_name, p->lines_required);
810 fprintf (stderr, _(" on repetition %d\n"), repetition);
812 fprintf (stderr, "\n");
817 /* Determine the line number that marks the end of this file,
818 then get those lines and save them to the output file.
819 P is the control record.
820 REPETITION is the repetition number. */
823 process_line_count (const struct control *p, int repetition)
825 unsigned int linenum;
826 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
827 struct cstring *line;
829 create_output_file ();
831 linenum = get_first_line_in_buffer ();
833 /* Initially, I wanted to assert linenum < last_line_to_save, but that
834 condition is false for the valid command: echo | csplit - 1 '{*}'.
835 So, relax it just a little. */
836 assert ((linenum == 1 && last_line_to_save == 1)
837 || linenum < last_line_to_save);
839 while (linenum++ < last_line_to_save)
841 line = remove_line ();
843 handle_line_error (p, repetition);
844 save_line_to_file (line);
847 close_output_file ();
849 /* Ensure that the line number specified is not 1 greater than
850 the number of lines in the file. */
851 if (no_more_lines ())
852 handle_line_error (p, repetition);
856 regexp_error (struct control *p, int repetition, boolean ignore)
858 fprintf (stderr, _("%s: `%s': match not found"),
859 program_name, global_argv[p->argnum]);
862 fprintf (stderr, _(" on repetition %d\n"), repetition);
864 fprintf (stderr, "\n");
868 dump_rest_of_file ();
869 close_output_file ();
874 /* Read the input until a line matches the regexp in P, outputting
875 it unless P->IGNORE is TRUE.
876 REPETITION is this repeat-count; 0 means the first time. */
879 process_regexp (struct control *p, int repetition)
881 struct cstring *line; /* From input file. */
882 unsigned int line_len; /* To make "$" in regexps work. */
883 unsigned int break_line; /* First line number of next file. */
884 boolean ignore = p->ignore; /* If TRUE, skip this section. */
888 create_output_file ();
890 /* If there is no offset for the regular expression, or
891 it is positive, then it is not necessary to buffer the lines. */
897 line = find_line (++current_line);
900 if (p->repeat_forever)
904 dump_rest_of_file ();
905 close_output_file ();
910 regexp_error (p, repetition, ignore);
912 line_len = line->len;
913 if (line->str[line_len - 1] == '\n')
915 ret = re_search (&p->re_compiled, line->str, line_len,
916 0, line_len, (struct re_registers *) 0);
919 error (0, 0, _("error in regular expression search"));
924 line = remove_line ();
926 save_line_to_file (line);
934 /* Buffer the lines. */
937 line = find_line (++current_line);
940 if (p->repeat_forever)
944 dump_rest_of_file ();
945 close_output_file ();
950 regexp_error (p, repetition, ignore);
952 line_len = line->len;
953 if (line->str[line_len - 1] == '\n')
955 ret = re_search (&p->re_compiled, line->str, line_len,
956 0, line_len, (struct re_registers *) 0);
959 error (0, 0, _("error in regular expression search"));
967 /* Account for any offset from this regexp. */
968 break_line = current_line + p->offset;
970 write_to_file (break_line, ignore, p->argnum);
973 close_output_file ();
975 current_line = break_line;
978 /* Split the input file according to the control records we have built. */
985 for (i = 0; i < control_used; i++)
987 if (controls[i].regexpr)
989 for (j = 0; (controls[i].repeat_forever
990 || j <= controls[i].repeat); j++)
991 process_regexp (&controls[i], j);
995 for (j = 0; (controls[i].repeat_forever
996 || j <= controls[i].repeat); j++)
997 process_line_count (&controls[i], j);
1001 create_output_file ();
1002 dump_rest_of_file ();
1003 close_output_file ();
1006 /* Return the name of output file number NUM. */
1009 make_filename (unsigned int num)
1011 strcpy (filename_space, prefix);
1013 sprintf (filename_space+strlen(prefix), suffix, num);
1015 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
1016 return filename_space;
1019 /* Create the next output file. */
1022 create_output_file (void)
1024 output_filename = make_filename (files_created);
1025 output_stream = fopen (output_filename, "w");
1026 if (output_stream == NULL)
1028 error (0, errno, "%s", output_filename);
1035 /* Delete all the files we have created. */
1038 delete_all_files (void)
1043 for (i = 0; i < files_created; i++)
1045 name = make_filename (i);
1047 error (0, errno, "%s", name);
1051 /* Close the current output file and print the count
1052 of characters in this file. */
1055 close_output_file (void)
1059 if (fclose (output_stream) == EOF)
1061 error (0, errno, _("write error for `%s'"), output_filename);
1062 output_stream = NULL;
1065 if (bytes_written == 0 && elide_empty_files)
1067 if (unlink (output_filename))
1068 error (0, errno, "%s", output_filename);
1072 if (!suppress_count)
1073 fprintf (stdout, "%d\n", bytes_written);
1074 output_stream = NULL;
1078 /* Save line LINE to the output file and
1079 increment the character count for the current file. */
1082 save_line_to_file (const struct cstring *line)
1084 fwrite (line->str, sizeof (char), line->len, output_stream);
1085 bytes_written += line->len;
1088 /* Return a new, initialized control record. */
1090 static struct control *
1091 new_control_record (void)
1093 static unsigned control_allocated = 0; /* Total space allocated. */
1096 if (control_allocated == 0)
1098 control_allocated = ALLOC_SIZE;
1099 controls = (struct control *)
1100 xmalloc (sizeof (struct control) * control_allocated);
1102 else if (control_used == control_allocated)
1104 control_allocated += ALLOC_SIZE;
1105 controls = (struct control *)
1106 xrealloc ((char *) controls,
1107 sizeof (struct control) * control_allocated);
1109 p = &controls[control_used++];
1112 p->repeat_forever = 0;
1113 p->lines_required = 0;
1118 /* Check if there is a numeric offset after a regular expression.
1119 STR is the entire command line argument.
1120 P is the control record for this regular expression.
1121 NUM is the numeric part of STR. */
1124 check_for_offset (struct control *p, const char *str, const char *num)
1128 if (*num != '-' && *num != '+')
1129 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1131 if (xstrtoul (num + 1, NULL, 10, &val, NULL) != LONGINT_OK
1133 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1134 p->offset = (unsigned int) val;
1137 p->offset = -p->offset;
1140 /* Given that the first character of command line arg STR is '{',
1141 make sure that the rest of the string is a valid repeat count
1142 and store its value in P.
1143 ARGNUM is the ARGV index of STR. */
1146 parse_repeat_count (int argnum, struct control *p, char *str)
1151 end = str + strlen (str) - 1;
1153 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1156 if (str+1 == end-1 && *(str+1) == '*')
1157 p->repeat_forever = 1;
1160 if (xstrtoul (str + 1, NULL, 10, &val, NULL) != LONGINT_OK
1163 error (EXIT_FAILURE, 0,
1164 _("%s}: integer required between `{' and `}'"),
1165 global_argv[argnum]);
1167 p->repeat = (unsigned int) val;
1173 /* Extract the regular expression from STR and check for a numeric offset.
1174 STR should start with the regexp delimiter character.
1175 Return a new control record for the regular expression.
1176 ARGNUM is the ARGV index of STR.
1177 Unless IGNORE is TRUE, mark these lines for output. */
1179 static struct control *
1180 extract_regexp (int argnum, boolean ignore, char *str)
1182 int len; /* Number of chars in this regexp. */
1184 char *closing_delim;
1188 closing_delim = strrchr (str + 1, delim);
1189 if (closing_delim == NULL)
1190 error (EXIT_FAILURE, 0,
1191 _("%s: closing delimeter `%c' missing"), str, delim);
1193 len = closing_delim - str - 1;
1194 p = new_control_record ();
1198 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1199 strncpy (p->regexpr, str + 1, len);
1200 p->re_compiled.allocated = len * 2;
1201 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1202 p->re_compiled.fastmap = xmalloc (256);
1203 p->re_compiled.translate = 0;
1205 p->re_compiled.syntax_parens = 0;
1207 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1210 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1214 if (closing_delim[1])
1215 check_for_offset (p, str, closing_delim + 1);
1220 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1221 After each pattern, check if the next argument is a repeat count. */
1224 parse_patterns (int argc, int start, char **argv)
1226 int i; /* Index into ARGV. */
1227 struct control *p; /* New control record created. */
1229 static unsigned long last_val = 0;
1231 for (i = start; i < argc; i++)
1233 if (*argv[i] == '/' || *argv[i] == '%')
1235 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1239 p = new_control_record ();
1242 if (xstrtoul (argv[i], NULL, 10, &val, NULL) != LONGINT_OK
1244 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1246 error (EXIT_FAILURE, 0,
1247 _("%s: line number must be greater than zero"),
1250 error (EXIT_FAILURE, 0,
1251 _("line number `%s' is smaller than preceding line number, %lu"),
1254 if (val == last_val)
1256 _("warning: line number `%s' is the same as preceding line number"),
1260 p->lines_required = (int) val;
1263 if (i + 1 < argc && *argv[i + 1] == '{')
1265 /* We have a repeat count. */
1267 parse_repeat_count (i, p, argv[i]);
1273 get_format_flags (char **format_ptr)
1277 for (; **format_ptr; (*format_ptr)++)
1279 switch (**format_ptr)
1290 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1301 get_format_width (char **format_ptr)
1307 start = *format_ptr;
1308 for (; **format_ptr; (*format_ptr)++)
1309 if (!ISDIGIT (**format_ptr))
1312 ch_save = **format_ptr;
1313 **format_ptr = '\0';
1314 /* In the case where no minimum field width is explicitly specified,
1315 allow for enough octal digits to represent the value of LONG_MAX. */
1316 count = ((*format_ptr == start)
1317 ? bytes_to_octal_digits[sizeof (long)]
1319 **format_ptr = ch_save;
1324 get_format_prec (char **format_ptr)
1331 if (**format_ptr != '.')
1335 if (**format_ptr == '-' || **format_ptr == '+')
1337 is_negative = (**format_ptr == '-');
1345 start = *format_ptr;
1346 for (; **format_ptr; (*format_ptr)++)
1347 if (!ISDIGIT (**format_ptr))
1350 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1353 start = *format_ptr;
1355 ch_save = **format_ptr;
1356 **format_ptr = '\0';
1357 count = (*format_ptr == start) ? 11 : atoi (start);
1358 **format_ptr = ch_save;
1364 get_format_conv_type (char **format_ptr)
1366 int ch = *((*format_ptr)++);
1379 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1384 error (EXIT_FAILURE, 0,
1385 _("invalid conversion specifier in suffix: %c"), ch);
1387 error (EXIT_FAILURE, 0,
1388 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1393 max_out (char *format)
1395 unsigned out_count = 0;
1396 unsigned percents = 0;
1407 out_count += get_format_flags (&format);
1409 int width = get_format_width (&format);
1410 int prec = get_format_prec (&format);
1412 out_count += MAX (width, prec);
1414 get_format_conv_type (&format);
1419 error (EXIT_FAILURE, 0,
1420 _("missing %% conversion specification in suffix"));
1421 else if (percents > 1)
1422 error (EXIT_FAILURE, 0,
1423 _("too many %% conversion specifications in suffix"));
1429 main (int argc, char **argv)
1434 struct sigaction oldact, newact;
1437 program_name = argv[0];
1438 setlocale (LC_ALL, "");
1439 bindtextdomain (PACKAGE, LOCALEDIR);
1440 textdomain (PACKAGE);
1445 suppress_count = FALSE;
1446 remove_files = TRUE;
1447 prefix = DEFAULT_PREFIX;
1450 newact.sa_handler = interrupt_handler;
1451 sigemptyset (&newact.sa_mask);
1452 newact.sa_flags = 0;
1454 sigaction (SIGHUP, NULL, &oldact);
1455 if (oldact.sa_handler != SIG_IGN)
1456 sigaction (SIGHUP, &newact, NULL);
1458 sigaction (SIGINT, NULL, &oldact);
1459 if (oldact.sa_handler != SIG_IGN)
1460 sigaction (SIGINT, &newact, NULL);
1462 sigaction (SIGQUIT, NULL, &oldact);
1463 if (oldact.sa_handler != SIG_IGN)
1464 sigaction (SIGQUIT, &newact, NULL);
1466 sigaction (SIGTERM, NULL, &oldact);
1467 if (oldact.sa_handler != SIG_IGN)
1468 sigaction (SIGTERM, &newact, NULL);
1469 #else /* not SA_INTERRUPT */
1470 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1471 signal (SIGHUP, interrupt_handler);
1472 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1473 signal (SIGINT, interrupt_handler);
1474 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1475 signal (SIGQUIT, interrupt_handler);
1476 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1477 signal (SIGTERM, interrupt_handler);
1478 #endif /* not SA_INTERRUPT */
1480 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, (int *) 0))
1496 remove_files = FALSE;
1500 if (xstrtoul (optarg, NULL, 10, &val, NULL) != LONGINT_OK
1502 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1508 suppress_count = TRUE;
1512 elide_empty_files = TRUE;
1521 printf ("csplit - %s\n", PACKAGE_VERSION);
1522 exit (EXIT_SUCCESS);
1528 if (argc - optind < 2)
1530 error (0, 0, _("too few arguments"));
1535 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1537 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1539 set_input_file (argv[optind++]);
1541 parse_patterns (argc, optind, argv);
1545 if (close (input_desc) < 0)
1547 error (0, errno, _("read error"));
1551 exit (EXIT_SUCCESS);
1558 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1563 Usage: %s [OPTION]... FILE PATTERN...\n\
1567 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1568 and output byte counts of each piece to standard output.\n\
1570 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1571 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1572 -k, --keep-files do not remove output files on errors\n\
1573 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1574 -s, --quiet, --silent do not print counts of output file sizes\n\
1575 -z, --elide-empty-files remove empty output files\n\
1576 --help display this help and exit\n\
1577 --version output version information and exit\n\
1579 Read standard input if FILE is -. Each PATTERN may be:\n\
1581 INTEGER copy up to but not including specified line number\n\
1582 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1583 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1584 {INTEGER} repeat the previous pattern specified number of times\n\
1585 {*} repeat the previous pattern as many times as possible\n\
1587 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1590 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);