1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 95, 1996 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 /* Disable assertions. Some systems have broken assert macros. */
30 #include <sys/types.h>
34 #endif /* HAVE_LIMITS_H */
37 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
41 # define INT_MAX ((int) (UINT_MAX >> 1))
61 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
69 /* Increment size of area for control records. */
72 /* The default prefix for output file names. */
73 #define DEFAULT_PREFIX "xx"
77 /* A compiled pattern arg. */
80 char *regexpr; /* Non-compiled regular expression. */
81 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
82 int offset; /* Offset from regexp to split at. */
83 int lines_required; /* Number of lines required. */
84 unsigned int repeat; /* Repeat count. */
85 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
86 int argnum; /* ARGV index. */
87 boolean ignore; /* If true, produce no output (for regexp). */
90 /* Initial size of data area in buffers. */
91 #define START_SIZE 8191
93 /* Increment size for data area. */
94 #define INCR_SIZE 2048
96 /* Number of lines kept in each node in line list. */
100 /* Some small values to test the algorithms. */
101 #define START_SIZE 200
106 /* A string with a length count. */
113 /* Pointers to the beginnings of lines in the buffer area.
114 These structures are linked together if needed. */
117 unsigned used; /* Number of offsets used in this struct. */
118 unsigned insert_index; /* Next offset to use when inserting line. */
119 unsigned retrieve_index; /* Next index to use when retrieving line. */
120 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
121 struct line *next; /* Next in linked list. */
124 /* The structure to hold the input lines.
125 Contains a pointer to the data area and a list containing
126 pointers to the individual lines. */
129 unsigned bytes_alloc; /* Size of the buffer area. */
130 unsigned bytes_used; /* Bytes used in the buffer area. */
131 unsigned start_line; /* First line number in this buffer. */
132 unsigned first_available; /* First line that can be retrieved. */
133 unsigned num_lines; /* Number of complete lines in this buffer. */
134 char *buffer; /* Data area. */
135 struct line *line_start; /* Head of list of pointers to lines. */
136 struct line *curr_line; /* The line start record currently in use. */
137 struct buffer_record *next;
142 static void close_output_file __P ((void));
143 static void create_output_file __P ((void));
144 static void delete_all_files __P ((void));
145 static void save_line_to_file __P ((const struct cstring *line));
146 static void usage __P ((int status));
148 /* The name this program was run with. */
151 /* Convert the number of 8-bit bytes of a binary representation to
152 the number of characters required to represent the same quantity
153 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
154 require a field width as wide as 11 characters. */
155 static const unsigned int bytes_to_octal_digits[] =
156 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
158 /* Input file descriptor. */
159 static int input_desc = 0;
161 /* List of available buffers. */
162 static struct buffer_record *free_list = NULL;
164 /* Start of buffer list. */
165 static struct buffer_record *head = NULL;
167 /* Partially read line. */
168 static char *hold_area = NULL;
170 /* Number of chars in `hold_area'. */
171 static unsigned hold_count = 0;
173 /* Number of the last line in the buffers. */
174 static unsigned last_line_number = 0;
176 /* Number of the line currently being examined. */
177 static unsigned current_line = 0;
179 /* If TRUE, we have read EOF. */
180 static boolean have_read_eof = FALSE;
182 /* Name of output files. */
183 static char *filename_space = NULL;
185 /* Prefix part of output file names. */
186 static char *prefix = NULL;
188 /* Suffix part of output file names. */
189 static char *suffix = NULL;
191 /* Number of digits to use in output file names. */
192 static int digits = 2;
194 /* Number of files created so far. */
195 static unsigned int files_created = 0;
197 /* Number of bytes written to current file. */
198 static unsigned int bytes_written;
200 /* Output file pointer. */
201 static FILE *output_stream = NULL;
203 /* Output file name. */
204 static char *output_filename = NULL;
206 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
207 static char **global_argv;
209 /* If TRUE, do not print the count of bytes in each output file. */
210 static boolean suppress_count;
212 /* If TRUE, remove output files on error. */
213 static boolean remove_files;
215 /* If TRUE, remove all output files which have a zero length. */
216 static boolean elide_empty_files;
218 /* The compiled pattern arguments, which determine how to split
220 static struct control *controls;
222 /* Number of elements in `controls'. */
223 static unsigned int control_used;
225 /* If nonzero, display usage information and exit. */
226 static int show_help;
228 /* If nonzero, print the version on standard output then exit. */
229 static int show_version;
231 static struct option const longopts[] =
233 {"digits", required_argument, NULL, 'n'},
234 {"quiet", no_argument, NULL, 'q'},
235 {"silent", no_argument, NULL, 's'},
236 {"keep-files", no_argument, NULL, 'k'},
237 {"elide-empty-files", no_argument, NULL, 'z'},
238 {"prefix", required_argument, NULL, 'f'},
239 {"suffix-format", required_argument, NULL, 'b'},
240 {"help", no_argument, &show_help, 1},
241 {"version", no_argument, &show_version, 1},
245 /* Optionally remove files created so far; then exit.
246 Called when an error detected. */
252 close_output_file ();
266 interrupt_handler (int sig)
269 struct sigaction sigact;
271 sigact.sa_handler = SIG_DFL;
272 sigemptyset (&sigact.sa_mask);
274 sigaction (sig, &sigact, NULL);
275 #else /* !SA_INTERRUPT */
276 signal (sig, SIG_DFL);
277 #endif /* SA_INTERRUPT */
279 kill (getpid (), sig);
282 /* Allocate N bytes of memory dynamically, with error checking. */
285 xmalloc (unsigned int n)
292 error (0, 0, _("virtual memory exhausted"));
298 /* Change the size of an allocated block of memory P to N bytes,
300 If P is NULL, run xmalloc.
301 If N is 0, run free and return NULL. */
304 xrealloc (char *p, unsigned int n)
316 error (0, 0, _("virtual memory exhausted"));
322 /* Keep track of NUM chars of a partial line in buffer START.
323 These chars will be retrieved later when another large buffer is read.
324 It is not necessary to create a new buffer for these chars; instead,
325 we keep a pointer to the existing buffer. This buffer *is* on the
326 free list, and when the next buffer is obtained from this list
327 (even if it is this one), these chars will be placed at the
328 start of the new buffer. */
331 save_to_hold_area (char *start, unsigned int num)
337 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
338 Return the number of chars read. */
341 read_input (char *dest, unsigned int max_n_bytes)
345 if (max_n_bytes == 0)
348 bytes_read = safe_read (input_desc, dest, max_n_bytes);
351 have_read_eof = TRUE;
355 error (0, errno, _("read error"));
362 /* Initialize existing line record P. */
365 clear_line_control (struct line *p)
369 p->retrieve_index = 0;
372 /* Initialize all line records in B. */
375 clear_all_line_control (struct buffer_record *b)
379 for (l = b->line_start; l; l = l->next)
380 clear_line_control (l);
383 /* Return a new, initialized line record. */
386 new_line_control (void)
390 p = (struct line *) xmalloc (sizeof (struct line));
393 clear_line_control (p);
398 /* Record LINE_START, which is the address of the start of a line
399 of length LINE_LEN in the large buffer, in the lines buffer of B. */
402 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
406 /* If there is no existing area to keep line info, get some. */
407 if (b->line_start == NULL)
408 b->line_start = b->curr_line = new_line_control ();
410 /* If existing area for lines is full, get more. */
411 if (b->curr_line->used == CTRL_SIZE)
413 b->curr_line->next = new_line_control ();
414 b->curr_line = b->curr_line->next;
419 /* Record the start of the line, and update counters. */
420 l->starts[l->insert_index].str = line_start;
421 l->starts[l->insert_index].len = line_len;
426 /* Scan the buffer in B for newline characters
427 and record the line start locations and lengths in B.
428 Return the number of lines found in this buffer.
430 There may be an incomplete line at the end of the buffer;
431 a pointer is kept to this area, which will be used when
432 the next buffer is filled. */
435 record_line_starts (struct buffer_record *b)
437 char *line_start; /* Start of current line. */
438 char *line_end; /* End of each line found. */
439 unsigned int bytes_left; /* Length of incomplete last line. */
440 unsigned int lines; /* Number of lines found. */
441 unsigned int line_length; /* Length of each line found. */
443 if (b->bytes_used == 0)
447 line_start = b->buffer;
448 bytes_left = b->bytes_used;
452 line_end = memchr (line_start, '\n', bytes_left);
453 if (line_end == NULL)
455 line_length = line_end - line_start + 1;
456 keep_new_line (b, line_start, line_length);
457 bytes_left -= line_length;
458 line_start = line_end + 1;
462 /* Check for an incomplete last line. */
467 keep_new_line (b, line_start, bytes_left);
471 save_to_hold_area (line_start, bytes_left);
474 b->num_lines = lines;
475 b->first_available = b->start_line = last_line_number + 1;
476 last_line_number += lines;
481 /* Return a new buffer with room to store SIZE bytes, plus
482 an extra byte for safety. */
484 static struct buffer_record *
485 create_new_buffer (unsigned int size)
487 struct buffer_record *new_buffer;
489 new_buffer = (struct buffer_record *)
490 xmalloc (sizeof (struct buffer_record));
492 new_buffer->buffer = (char *) xmalloc (size + 1);
494 new_buffer->bytes_alloc = size;
495 new_buffer->line_start = new_buffer->curr_line = NULL;
500 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
501 least that size is currently free, use it, otherwise create a new one. */
503 static struct buffer_record *
504 get_new_buffer (unsigned int min_size)
506 struct buffer_record *p, *q;
507 struct buffer_record *new_buffer; /* Buffer to return. */
508 unsigned int alloc_size; /* Actual size that will be requested. */
510 alloc_size = START_SIZE;
511 while (min_size > alloc_size)
512 alloc_size += INCR_SIZE;
514 if (free_list == NULL)
515 new_buffer = create_new_buffer (alloc_size);
518 /* Use first-fit to find a buffer. */
519 p = new_buffer = NULL;
524 if (q->bytes_alloc >= min_size)
537 new_buffer = (q ? q : create_new_buffer (alloc_size));
539 new_buffer->curr_line = new_buffer->line_start;
540 clear_all_line_control (new_buffer);
543 new_buffer->num_lines = 0;
544 new_buffer->bytes_used = 0;
545 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
546 new_buffer->next = NULL;
551 /* Add buffer BUF to the list of free buffers. */
554 free_buffer (struct buffer_record *buf)
556 buf->next = free_list;
560 /* Append buffer BUF to the linked list of buffers that contain
561 some data yet to be processed. */
564 save_buffer (struct buffer_record *buf)
566 struct buffer_record *p;
569 buf->curr_line = buf->line_start;
575 for (p = head; p->next; p = p->next)
581 /* Fill a buffer of input.
583 Set the initial size of the buffer to a default.
584 Fill the buffer (from the hold area and input stream)
585 and find the individual lines.
586 If no lines are found (the buffer is too small to hold the next line),
587 release the current buffer (whose contents would have been put in the
588 hold area) and repeat the process with another large buffer until at least
589 one entire line has been read.
591 Return TRUE if a new buffer was obtained, otherwise false
592 (in which case end-of-file must have been encountered). */
597 struct buffer_record *b;
598 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
599 unsigned int bytes_avail; /* Size of new buffer created. */
600 unsigned int lines_found; /* Number of lines in this new buffer. */
601 char *p; /* Place to load into buffer. */
606 /* We must make the buffer at least as large as the amount of data
607 in the partial line left over from the last call. */
608 if (bytes_wanted < hold_count)
609 bytes_wanted = hold_count;
613 b = get_new_buffer (bytes_wanted);
614 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
617 /* First check the `holding' area for a partial line. */
621 memcpy (p, hold_area, hold_count);
623 b->bytes_used += hold_count;
624 bytes_avail -= hold_count;
628 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
630 lines_found = record_line_starts (b);
631 bytes_wanted = b->bytes_alloc * 2;
635 while (!lines_found && !have_read_eof);
640 return lines_found != 0;
643 /* Return the line number of the first line that has not yet been retrieved. */
646 get_first_line_in_buffer (void)
648 if (head == NULL && !load_buffer ())
649 error (EXIT_FAILURE, errno, _("input disappeared"));
651 return head->first_available;
654 /* Return a pointer to the logical first line in the buffer and make the
655 next line the logical first line.
656 Return NULL if there is no more input. */
658 static struct cstring *
661 struct cstring *line; /* Return value. */
662 struct line *l; /* For convenience. */
664 if (head == NULL && !load_buffer ())
667 if (current_line < head->first_available)
668 current_line = head->first_available;
670 ++(head->first_available);
674 line = &l->starts[l->retrieve_index];
676 /* Advance index to next line. */
677 if (++l->retrieve_index == l->used)
679 /* Go on to the next line record. */
680 head->curr_line = l->next;
681 if (head->curr_line == NULL || head->curr_line->used == 0)
683 /* Go on to the next data block. */
684 struct buffer_record *b = head;
693 /* Search the buffers for line LINENUM, reading more input if necessary.
694 Return a pointer to the line, or NULL if it is not found in the file. */
696 static struct cstring *
697 find_line (unsigned int linenum)
699 struct buffer_record *b;
701 if (head == NULL && !load_buffer ())
704 if (linenum < head->start_line)
709 if (linenum < b->start_line + b->num_lines)
711 /* The line is in this buffer. */
713 unsigned int offset; /* How far into the buffer the line is. */
716 offset = linenum - b->start_line;
717 /* Find the control record. */
718 while (offset >= CTRL_SIZE)
723 return &l->starts[offset];
725 if (b->next == NULL && !load_buffer ())
727 b = b->next; /* Try the next data block. */
731 /* Return TRUE if at least one more line is available for input. */
736 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
739 /* Set the name of the input file to NAME and open it. */
742 set_input_file (const char *name)
744 if (!strcmp (name, "-"))
748 input_desc = open (name, O_RDONLY);
750 error (EXIT_FAILURE, errno, "%s", name);
754 /* Write all lines from the beginning of the buffer up to, but
755 not including, line LAST_LINE, to the current output file.
756 If IGNORE is TRUE, do not output lines selected here.
757 ARGNUM is the index in ARGV of the current pattern. */
760 write_to_file (unsigned int last_line, boolean ignore, int argnum)
762 struct cstring *line;
763 unsigned int first_line; /* First available input line. */
764 unsigned int lines; /* Number of lines to output. */
767 first_line = get_first_line_in_buffer ();
769 if (first_line > last_line)
771 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
775 lines = last_line - first_line;
777 for (i = 0; i < lines; i++)
779 line = remove_line ();
782 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
786 save_line_to_file (line);
790 /* Output any lines left after all regexps have been processed. */
793 dump_rest_of_file (void)
795 struct cstring *line;
797 while ((line = remove_line ()) != NULL)
798 save_line_to_file (line);
801 /* Handle an attempt to read beyond EOF under the control of record P,
802 on iteration REPETITION if nonzero. */
805 handle_line_error (const struct control *p, int repetition)
807 fprintf (stderr, _("%s: `%d': line number out of range"),
808 program_name, p->lines_required);
810 fprintf (stderr, _(" on repetition %d\n"), repetition);
812 fprintf (stderr, "\n");
817 /* Determine the line number that marks the end of this file,
818 then get those lines and save them to the output file.
819 P is the control record.
820 REPETITION is the repetition number. */
823 process_line_count (const struct control *p, int repetition)
825 unsigned int linenum;
826 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
827 struct cstring *line;
829 create_output_file ();
831 linenum = get_first_line_in_buffer ();
833 /* Initially, I wanted to assert linenum < last_line_to_save, but that
834 condition is false for the valid command: echo | csplit - 1 '{*}'.
835 So, relax it just a little. */
836 assert ((linenum == 1 && last_line_to_save == 1)
837 || linenum < last_line_to_save);
839 while (linenum++ < last_line_to_save)
841 line = remove_line ();
843 handle_line_error (p, repetition);
844 save_line_to_file (line);
847 close_output_file ();
849 /* Ensure that the line number specified is not 1 greater than
850 the number of lines in the file. */
851 if (no_more_lines ())
852 handle_line_error (p, repetition);
856 regexp_error (struct control *p, int repetition, boolean ignore)
858 fprintf (stderr, _("%s: `%s': match not found"),
859 program_name, global_argv[p->argnum]);
862 fprintf (stderr, _(" on repetition %d\n"), repetition);
864 fprintf (stderr, "\n");
868 dump_rest_of_file ();
869 close_output_file ();
874 /* Read the input until a line matches the regexp in P, outputting
875 it unless P->IGNORE is TRUE.
876 REPETITION is this repeat-count; 0 means the first time. */
879 process_regexp (struct control *p, int repetition)
881 struct cstring *line; /* From input file. */
882 unsigned int line_len; /* To make "$" in regexps work. */
883 unsigned int break_line; /* First line number of next file. */
884 boolean ignore = p->ignore; /* If TRUE, skip this section. */
888 create_output_file ();
890 /* If there is no offset for the regular expression, or
891 it is positive, then it is not necessary to buffer the lines. */
897 line = find_line (++current_line);
900 if (p->repeat_forever)
904 dump_rest_of_file ();
905 close_output_file ();
910 regexp_error (p, repetition, ignore);
912 line_len = line->len;
913 if (line->str[line_len - 1] == '\n')
915 ret = re_search (&p->re_compiled, line->str, line_len,
916 0, line_len, (struct re_registers *) 0);
919 error (0, 0, _("error in regular expression search"));
924 line = remove_line ();
926 save_line_to_file (line);
934 /* Buffer the lines. */
937 line = find_line (++current_line);
940 if (p->repeat_forever)
944 dump_rest_of_file ();
945 close_output_file ();
950 regexp_error (p, repetition, ignore);
952 line_len = line->len;
953 if (line->str[line_len - 1] == '\n')
955 ret = re_search (&p->re_compiled, line->str, line_len,
956 0, line_len, (struct re_registers *) 0);
959 error (0, 0, _("error in regular expression search"));
967 /* Account for any offset from this regexp. */
968 break_line = current_line + p->offset;
970 write_to_file (break_line, ignore, p->argnum);
973 close_output_file ();
976 current_line = break_line;
979 /* Split the input file according to the control records we have built. */
986 for (i = 0; i < control_used; i++)
988 if (controls[i].regexpr)
990 for (j = 0; (controls[i].repeat_forever
991 || j <= controls[i].repeat); j++)
992 process_regexp (&controls[i], j);
996 for (j = 0; (controls[i].repeat_forever
997 || j <= controls[i].repeat); j++)
998 process_line_count (&controls[i], j);
1002 create_output_file ();
1003 dump_rest_of_file ();
1004 close_output_file ();
1007 /* Return the name of output file number NUM. */
1010 make_filename (unsigned int num)
1012 strcpy (filename_space, prefix);
1014 sprintf (filename_space+strlen(prefix), suffix, num);
1016 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
1017 return filename_space;
1020 /* Create the next output file. */
1023 create_output_file (void)
1025 output_filename = make_filename (files_created);
1026 output_stream = fopen (output_filename, "w");
1027 if (output_stream == NULL)
1029 error (0, errno, "%s", output_filename);
1036 /* Delete all the files we have created. */
1039 delete_all_files (void)
1044 for (i = 0; i < files_created; i++)
1046 name = make_filename (i);
1048 error (0, errno, "%s", name);
1052 /* Close the current output file and print the count
1053 of characters in this file. */
1056 close_output_file (void)
1060 if (fclose (output_stream) == EOF)
1062 error (0, errno, _("write error for `%s'"), output_filename);
1063 output_stream = NULL;
1066 if (bytes_written == 0 && elide_empty_files)
1068 if (unlink (output_filename))
1069 error (0, errno, "%s", output_filename);
1073 if (!suppress_count)
1074 fprintf (stdout, "%d\n", bytes_written);
1075 output_stream = NULL;
1079 /* Save line LINE to the output file and
1080 increment the character count for the current file. */
1083 save_line_to_file (const struct cstring *line)
1085 fwrite (line->str, sizeof (char), line->len, output_stream);
1086 bytes_written += line->len;
1089 /* Return a new, initialized control record. */
1091 static struct control *
1092 new_control_record (void)
1094 static unsigned control_allocated = 0; /* Total space allocated. */
1097 if (control_allocated == 0)
1099 control_allocated = ALLOC_SIZE;
1100 controls = (struct control *)
1101 xmalloc (sizeof (struct control) * control_allocated);
1103 else if (control_used == control_allocated)
1105 control_allocated += ALLOC_SIZE;
1106 controls = (struct control *)
1107 xrealloc ((char *) controls,
1108 sizeof (struct control) * control_allocated);
1110 p = &controls[control_used++];
1113 p->repeat_forever = 0;
1114 p->lines_required = 0;
1119 /* Check if there is a numeric offset after a regular expression.
1120 STR is the entire command line argument.
1121 P is the control record for this regular expression.
1122 NUM is the numeric part of STR. */
1125 check_for_offset (struct control *p, const char *str, const char *num)
1129 if (*num != '-' && *num != '+')
1130 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1132 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1134 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1135 p->offset = (unsigned int) val;
1138 p->offset = -p->offset;
1141 /* Given that the first character of command line arg STR is '{',
1142 make sure that the rest of the string is a valid repeat count
1143 and store its value in P.
1144 ARGNUM is the ARGV index of STR. */
1147 parse_repeat_count (int argnum, struct control *p, char *str)
1152 end = str + strlen (str) - 1;
1154 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1157 if (str+1 == end-1 && *(str+1) == '*')
1158 p->repeat_forever = 1;
1161 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1164 error (EXIT_FAILURE, 0,
1165 _("%s}: integer required between `{' and `}'"),
1166 global_argv[argnum]);
1168 p->repeat = (unsigned int) val;
1174 /* Extract the regular expression from STR and check for a numeric offset.
1175 STR should start with the regexp delimiter character.
1176 Return a new control record for the regular expression.
1177 ARGNUM is the ARGV index of STR.
1178 Unless IGNORE is TRUE, mark these lines for output. */
1180 static struct control *
1181 extract_regexp (int argnum, boolean ignore, char *str)
1183 int len; /* Number of chars in this regexp. */
1185 char *closing_delim;
1189 closing_delim = strrchr (str + 1, delim);
1190 if (closing_delim == NULL)
1191 error (EXIT_FAILURE, 0,
1192 _("%s: closing delimeter `%c' missing"), str, delim);
1194 len = closing_delim - str - 1;
1195 p = new_control_record ();
1199 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1200 strncpy (p->regexpr, str + 1, len);
1201 p->re_compiled.allocated = len * 2;
1202 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1203 p->re_compiled.fastmap = xmalloc (256);
1204 p->re_compiled.translate = 0;
1206 p->re_compiled.syntax_parens = 0;
1208 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1211 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1215 if (closing_delim[1])
1216 check_for_offset (p, str, closing_delim + 1);
1221 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1222 After each pattern, check if the next argument is a repeat count. */
1225 parse_patterns (int argc, int start, char **argv)
1227 int i; /* Index into ARGV. */
1228 struct control *p; /* New control record created. */
1230 static unsigned long last_val = 0;
1232 for (i = start; i < argc; i++)
1234 if (*argv[i] == '/' || *argv[i] == '%')
1236 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1240 p = new_control_record ();
1243 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1245 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1247 error (EXIT_FAILURE, 0,
1248 _("%s: line number must be greater than zero"),
1251 error (EXIT_FAILURE, 0,
1252 _("line number `%s' is smaller than preceding line number, %lu"),
1255 if (val == last_val)
1257 _("warning: line number `%s' is the same as preceding line number"),
1261 p->lines_required = (int) val;
1264 if (i + 1 < argc && *argv[i + 1] == '{')
1266 /* We have a repeat count. */
1268 parse_repeat_count (i, p, argv[i]);
1274 get_format_flags (char **format_ptr)
1278 for (; **format_ptr; (*format_ptr)++)
1280 switch (**format_ptr)
1291 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1302 get_format_width (char **format_ptr)
1308 start = *format_ptr;
1309 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1312 ch_save = **format_ptr;
1313 **format_ptr = '\0';
1314 /* In the case where no minimum field width is explicitly specified,
1315 allow for enough octal digits to represent the value of LONG_MAX. */
1316 count = ((*format_ptr == start)
1317 ? bytes_to_octal_digits[sizeof (long)]
1319 **format_ptr = ch_save;
1324 get_format_prec (char **format_ptr)
1331 if (**format_ptr != '.')
1335 if (**format_ptr == '-' || **format_ptr == '+')
1337 is_negative = (**format_ptr == '-');
1345 start = *format_ptr;
1346 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1349 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1352 start = *format_ptr;
1354 ch_save = **format_ptr;
1355 **format_ptr = '\0';
1356 count = (*format_ptr == start) ? 11 : atoi (start);
1357 **format_ptr = ch_save;
1363 get_format_conv_type (char **format_ptr)
1365 int ch = *((*format_ptr)++);
1378 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1383 error (EXIT_FAILURE, 0,
1384 _("invalid conversion specifier in suffix: %c"), ch);
1386 error (EXIT_FAILURE, 0,
1387 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1392 max_out (char *format)
1394 unsigned out_count = 0;
1395 unsigned percents = 0;
1406 out_count += get_format_flags (&format);
1408 int width = get_format_width (&format);
1409 int prec = get_format_prec (&format);
1411 out_count += MAX (width, prec);
1413 get_format_conv_type (&format);
1418 error (EXIT_FAILURE, 0,
1419 _("missing %% conversion specification in suffix"));
1420 else if (percents > 1)
1421 error (EXIT_FAILURE, 0,
1422 _("too many %% conversion specifications in suffix"));
1428 main (int argc, char **argv)
1433 struct sigaction oldact, newact;
1436 program_name = argv[0];
1437 setlocale (LC_ALL, "");
1438 bindtextdomain (PACKAGE, LOCALEDIR);
1439 textdomain (PACKAGE);
1444 suppress_count = FALSE;
1445 remove_files = TRUE;
1446 prefix = DEFAULT_PREFIX;
1449 newact.sa_handler = interrupt_handler;
1450 sigemptyset (&newact.sa_mask);
1451 newact.sa_flags = 0;
1453 sigaction (SIGHUP, NULL, &oldact);
1454 if (oldact.sa_handler != SIG_IGN)
1455 sigaction (SIGHUP, &newact, NULL);
1457 sigaction (SIGINT, NULL, &oldact);
1458 if (oldact.sa_handler != SIG_IGN)
1459 sigaction (SIGINT, &newact, NULL);
1461 sigaction (SIGQUIT, NULL, &oldact);
1462 if (oldact.sa_handler != SIG_IGN)
1463 sigaction (SIGQUIT, &newact, NULL);
1465 sigaction (SIGTERM, NULL, &oldact);
1466 if (oldact.sa_handler != SIG_IGN)
1467 sigaction (SIGTERM, &newact, NULL);
1468 #else /* not SA_INTERRUPT */
1469 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1470 signal (SIGHUP, interrupt_handler);
1471 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1472 signal (SIGINT, interrupt_handler);
1473 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1474 signal (SIGQUIT, interrupt_handler);
1475 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1476 signal (SIGTERM, interrupt_handler);
1477 #endif /* not SA_INTERRUPT */
1479 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1494 remove_files = FALSE;
1498 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1500 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1506 suppress_count = TRUE;
1510 elide_empty_files = TRUE;
1519 printf ("csplit (%s) %s\n", GNU_PACKAGE, VERSION);
1520 exit (EXIT_SUCCESS);
1526 if (argc - optind < 2)
1528 error (0, 0, _("too few arguments"));
1533 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1535 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1537 set_input_file (argv[optind++]);
1539 parse_patterns (argc, optind, argv);
1543 if (close (input_desc) < 0)
1545 error (0, errno, _("read error"));
1549 exit (EXIT_SUCCESS);
1556 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1561 Usage: %s [OPTION]... FILE PATTERN...\n\
1565 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1566 and output byte counts of each piece to standard output.\n\
1568 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1569 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1570 -k, --keep-files do not remove output files on errors\n\
1571 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1572 -s, --quiet, --silent do not print counts of output file sizes\n\
1573 -z, --elide-empty-files remove empty output files\n\
1574 --help display this help and exit\n\
1575 --version output version information and exit\n\
1577 Read standard input if FILE is -. Each PATTERN may be:\n\
1579 INTEGER copy up to but not including specified line number\n\
1580 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1581 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1582 {INTEGER} repeat the previous pattern specified number of times\n\
1583 {*} repeat the previous pattern as many times as possible\n\
1585 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1587 puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu"));
1589 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);