1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
26 #include <sys/types.h>
34 #include "safe-read.h"
38 /* The official name of this program (e.g., no `g' prefix). */
39 #define PROGRAM_NAME "csplit"
41 #define AUTHORS "Stuart Kemp and David MacKenzie"
48 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
56 /* Increment size of area for control records. */
59 /* The default prefix for output file names. */
60 #define DEFAULT_PREFIX "xx"
64 /* A compiled pattern arg. */
67 char *regexpr; /* Non-compiled regular expression. */
68 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
69 int offset; /* Offset from regexp to split at. */
70 int lines_required; /* Number of lines required. */
71 unsigned int repeat; /* Repeat count. */
72 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
73 int argnum; /* ARGV index. */
74 boolean ignore; /* If true, produce no output (for regexp). */
77 /* Initial size of data area in buffers. */
78 #define START_SIZE 8191
80 /* Increment size for data area. */
81 #define INCR_SIZE 2048
83 /* Number of lines kept in each node in line list. */
87 /* Some small values to test the algorithms. */
88 # define START_SIZE 200
93 /* A string with a length count. */
100 /* Pointers to the beginnings of lines in the buffer area.
101 These structures are linked together if needed. */
104 unsigned used; /* Number of offsets used in this struct. */
105 unsigned insert_index; /* Next offset to use when inserting line. */
106 unsigned retrieve_index; /* Next index to use when retrieving line. */
107 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
108 struct line *next; /* Next in linked list. */
111 /* The structure to hold the input lines.
112 Contains a pointer to the data area and a list containing
113 pointers to the individual lines. */
116 unsigned bytes_alloc; /* Size of the buffer area. */
117 unsigned bytes_used; /* Bytes used in the buffer area. */
118 unsigned start_line; /* First line number in this buffer. */
119 unsigned first_available; /* First line that can be retrieved. */
120 unsigned num_lines; /* Number of complete lines in this buffer. */
121 char *buffer; /* Data area. */
122 struct line *line_start; /* Head of list of pointers to lines. */
123 struct line *curr_line; /* The line start record currently in use. */
124 struct buffer_record *next;
127 static void close_output_file PARAMS ((void));
128 static void create_output_file PARAMS ((void));
129 static void delete_all_files PARAMS ((void));
130 static void save_line_to_file PARAMS ((const struct cstring *line));
131 void usage PARAMS ((int status));
133 /* The name this program was run with. */
136 /* Convert the number of 8-bit bytes of a binary representation to
137 the number of characters required to represent the same quantity
138 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
139 require a field width as wide as 11 characters. */
140 static const unsigned int bytes_to_octal_digits[] =
141 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
143 /* Input file descriptor. */
144 static int input_desc = 0;
146 /* List of available buffers. */
147 static struct buffer_record *free_list = NULL;
149 /* Start of buffer list. */
150 static struct buffer_record *head = NULL;
152 /* Partially read line. */
153 static char *hold_area = NULL;
155 /* Number of chars in `hold_area'. */
156 static unsigned hold_count = 0;
158 /* Number of the last line in the buffers. */
159 static unsigned last_line_number = 0;
161 /* Number of the line currently being examined. */
162 static unsigned current_line = 0;
164 /* If TRUE, we have read EOF. */
165 static boolean have_read_eof = FALSE;
167 /* Name of output files. */
168 static char *filename_space = NULL;
170 /* Prefix part of output file names. */
171 static char *prefix = NULL;
173 /* Suffix part of output file names. */
174 static char *suffix = NULL;
176 /* Number of digits to use in output file names. */
177 static int digits = 2;
179 /* Number of files created so far. */
180 static unsigned int files_created = 0;
182 /* Number of bytes written to current file. */
183 static unsigned int bytes_written;
185 /* Output file pointer. */
186 static FILE *output_stream = NULL;
188 /* Output file name. */
189 static char *output_filename = NULL;
191 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
192 static char **global_argv;
194 /* If TRUE, do not print the count of bytes in each output file. */
195 static boolean suppress_count;
197 /* If TRUE, remove output files on error. */
198 static boolean remove_files;
200 /* If TRUE, remove all output files which have a zero length. */
201 static boolean elide_empty_files;
203 /* The compiled pattern arguments, which determine how to split
205 static struct control *controls;
207 /* Number of elements in `controls'. */
208 static unsigned int control_used;
210 static struct option const longopts[] =
212 {"digits", required_argument, NULL, 'n'},
213 {"quiet", no_argument, NULL, 'q'},
214 {"silent", no_argument, NULL, 's'},
215 {"keep-files", no_argument, NULL, 'k'},
216 {"elide-empty-files", no_argument, NULL, 'z'},
217 {"prefix", required_argument, NULL, 'f'},
218 {"suffix-format", required_argument, NULL, 'b'},
219 {GETOPT_HELP_OPTION_DECL},
220 {GETOPT_VERSION_OPTION_DECL},
224 /* Optionally remove files created so far; then exit.
225 Called when an error detected. */
231 close_output_file ();
245 interrupt_handler (int sig)
248 struct sigaction sigact;
250 sigact.sa_handler = SIG_DFL;
251 sigemptyset (&sigact.sa_mask);
253 sigaction (sig, &sigact, NULL);
254 #else /* !SA_INTERRUPT */
255 signal (sig, SIG_DFL);
256 #endif /* SA_INTERRUPT */
258 kill (getpid (), sig);
261 /* Keep track of NUM chars of a partial line in buffer START.
262 These chars will be retrieved later when another large buffer is read.
263 It is not necessary to create a new buffer for these chars; instead,
264 we keep a pointer to the existing buffer. This buffer *is* on the
265 free list, and when the next buffer is obtained from this list
266 (even if it is this one), these chars will be placed at the
267 start of the new buffer. */
270 save_to_hold_area (char *start, unsigned int num)
276 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
277 Return the number of chars read. */
280 read_input (char *dest, unsigned int max_n_bytes)
284 if (max_n_bytes == 0)
287 bytes_read = safe_read (input_desc, dest, max_n_bytes);
290 have_read_eof = TRUE;
294 error (0, errno, _("read error"));
301 /* Initialize existing line record P. */
304 clear_line_control (struct line *p)
308 p->retrieve_index = 0;
311 /* Initialize all line records in B. */
314 clear_all_line_control (struct buffer_record *b)
318 for (l = b->line_start; l; l = l->next)
319 clear_line_control (l);
322 /* Return a new, initialized line record. */
325 new_line_control (void)
329 p = (struct line *) xmalloc (sizeof (struct line));
332 clear_line_control (p);
337 /* Record LINE_START, which is the address of the start of a line
338 of length LINE_LEN in the large buffer, in the lines buffer of B. */
341 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
345 /* If there is no existing area to keep line info, get some. */
346 if (b->line_start == NULL)
347 b->line_start = b->curr_line = new_line_control ();
349 /* If existing area for lines is full, get more. */
350 if (b->curr_line->used == CTRL_SIZE)
352 b->curr_line->next = new_line_control ();
353 b->curr_line = b->curr_line->next;
358 /* Record the start of the line, and update counters. */
359 l->starts[l->insert_index].str = line_start;
360 l->starts[l->insert_index].len = line_len;
365 /* Scan the buffer in B for newline characters
366 and record the line start locations and lengths in B.
367 Return the number of lines found in this buffer.
369 There may be an incomplete line at the end of the buffer;
370 a pointer is kept to this area, which will be used when
371 the next buffer is filled. */
374 record_line_starts (struct buffer_record *b)
376 char *line_start; /* Start of current line. */
377 char *line_end; /* End of each line found. */
378 unsigned int bytes_left; /* Length of incomplete last line. */
379 unsigned int lines; /* Number of lines found. */
380 unsigned int line_length; /* Length of each line found. */
382 if (b->bytes_used == 0)
386 line_start = b->buffer;
387 bytes_left = b->bytes_used;
391 line_end = memchr (line_start, '\n', bytes_left);
392 if (line_end == NULL)
394 line_length = line_end - line_start + 1;
395 keep_new_line (b, line_start, line_length);
396 bytes_left -= line_length;
397 line_start = line_end + 1;
401 /* Check for an incomplete last line. */
406 keep_new_line (b, line_start, bytes_left);
410 save_to_hold_area (line_start, bytes_left);
413 b->num_lines = lines;
414 b->first_available = b->start_line = last_line_number + 1;
415 last_line_number += lines;
420 /* Return a new buffer with room to store SIZE bytes, plus
421 an extra byte for safety. */
423 static struct buffer_record *
424 create_new_buffer (unsigned int size)
426 struct buffer_record *new_buffer;
428 new_buffer = (struct buffer_record *)
429 xmalloc (sizeof (struct buffer_record));
431 new_buffer->buffer = (char *) xmalloc (size + 1);
433 new_buffer->bytes_alloc = size;
434 new_buffer->line_start = new_buffer->curr_line = NULL;
439 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
440 least that size is currently free, use it, otherwise create a new one. */
442 static struct buffer_record *
443 get_new_buffer (unsigned int min_size)
445 struct buffer_record *p, *q;
446 struct buffer_record *new_buffer; /* Buffer to return. */
447 unsigned int alloc_size; /* Actual size that will be requested. */
449 alloc_size = START_SIZE;
450 while (min_size > alloc_size)
451 alloc_size += INCR_SIZE;
453 if (free_list == NULL)
454 new_buffer = create_new_buffer (alloc_size);
457 /* Use first-fit to find a buffer. */
458 p = new_buffer = NULL;
463 if (q->bytes_alloc >= min_size)
476 new_buffer = (q ? q : create_new_buffer (alloc_size));
478 new_buffer->curr_line = new_buffer->line_start;
479 clear_all_line_control (new_buffer);
482 new_buffer->num_lines = 0;
483 new_buffer->bytes_used = 0;
484 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
485 new_buffer->next = NULL;
490 /* Add buffer BUF to the list of free buffers. */
493 free_buffer (struct buffer_record *buf)
495 buf->next = free_list;
499 /* Append buffer BUF to the linked list of buffers that contain
500 some data yet to be processed. */
503 save_buffer (struct buffer_record *buf)
505 struct buffer_record *p;
508 buf->curr_line = buf->line_start;
514 for (p = head; p->next; p = p->next)
520 /* Fill a buffer of input.
522 Set the initial size of the buffer to a default.
523 Fill the buffer (from the hold area and input stream)
524 and find the individual lines.
525 If no lines are found (the buffer is too small to hold the next line),
526 release the current buffer (whose contents would have been put in the
527 hold area) and repeat the process with another large buffer until at least
528 one entire line has been read.
530 Return TRUE if a new buffer was obtained, otherwise false
531 (in which case end-of-file must have been encountered). */
536 struct buffer_record *b;
537 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
538 unsigned int bytes_avail; /* Size of new buffer created. */
539 unsigned int lines_found; /* Number of lines in this new buffer. */
540 char *p; /* Place to load into buffer. */
545 /* We must make the buffer at least as large as the amount of data
546 in the partial line left over from the last call. */
547 if (bytes_wanted < hold_count)
548 bytes_wanted = hold_count;
552 b = get_new_buffer (bytes_wanted);
553 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
556 /* First check the `holding' area for a partial line. */
560 memcpy (p, hold_area, hold_count);
562 b->bytes_used += hold_count;
563 bytes_avail -= hold_count;
567 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
569 lines_found = record_line_starts (b);
570 bytes_wanted = b->bytes_alloc * 2;
574 while (!lines_found && !have_read_eof);
579 return lines_found != 0;
582 /* Return the line number of the first line that has not yet been retrieved. */
585 get_first_line_in_buffer (void)
587 if (head == NULL && !load_buffer ())
588 error (EXIT_FAILURE, errno, _("input disappeared"));
590 return head->first_available;
593 /* Return a pointer to the logical first line in the buffer and make the
594 next line the logical first line.
595 Return NULL if there is no more input. */
597 static struct cstring *
600 struct cstring *line; /* Return value. */
601 struct line *l; /* For convenience. */
603 if (head == NULL && !load_buffer ())
606 if (current_line < head->first_available)
607 current_line = head->first_available;
609 ++(head->first_available);
613 line = &l->starts[l->retrieve_index];
615 /* Advance index to next line. */
616 if (++l->retrieve_index == l->used)
618 /* Go on to the next line record. */
619 head->curr_line = l->next;
620 if (head->curr_line == NULL || head->curr_line->used == 0)
622 /* Go on to the next data block. */
623 struct buffer_record *b = head;
632 /* Search the buffers for line LINENUM, reading more input if necessary.
633 Return a pointer to the line, or NULL if it is not found in the file. */
635 static struct cstring *
636 find_line (unsigned int linenum)
638 struct buffer_record *b;
640 if (head == NULL && !load_buffer ())
643 if (linenum < head->start_line)
648 if (linenum < b->start_line + b->num_lines)
650 /* The line is in this buffer. */
652 unsigned int offset; /* How far into the buffer the line is. */
655 offset = linenum - b->start_line;
656 /* Find the control record. */
657 while (offset >= CTRL_SIZE)
662 return &l->starts[offset];
664 if (b->next == NULL && !load_buffer ())
666 b = b->next; /* Try the next data block. */
670 /* Return TRUE if at least one more line is available for input. */
675 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
678 /* Set the name of the input file to NAME and open it. */
681 set_input_file (const char *name)
683 if (STREQ (name, "-"))
687 input_desc = open (name, O_RDONLY);
689 error (EXIT_FAILURE, errno, "%s", name);
693 /* Write all lines from the beginning of the buffer up to, but
694 not including, line LAST_LINE, to the current output file.
695 If IGNORE is TRUE, do not output lines selected here.
696 ARGNUM is the index in ARGV of the current pattern. */
699 write_to_file (unsigned int last_line, boolean ignore, int argnum)
701 struct cstring *line;
702 unsigned int first_line; /* First available input line. */
703 unsigned int lines; /* Number of lines to output. */
706 first_line = get_first_line_in_buffer ();
708 if (first_line > last_line)
710 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
714 lines = last_line - first_line;
716 for (i = 0; i < lines; i++)
718 line = remove_line ();
721 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
725 save_line_to_file (line);
729 /* Output any lines left after all regexps have been processed. */
732 dump_rest_of_file (void)
734 struct cstring *line;
736 while ((line = remove_line ()) != NULL)
737 save_line_to_file (line);
740 /* Handle an attempt to read beyond EOF under the control of record P,
741 on iteration REPETITION if nonzero. */
744 handle_line_error (const struct control *p, int repetition)
746 fprintf (stderr, _("%s: `%d': line number out of range"),
747 program_name, p->lines_required);
749 fprintf (stderr, _(" on repetition %d\n"), repetition);
751 fprintf (stderr, "\n");
756 /* Determine the line number that marks the end of this file,
757 then get those lines and save them to the output file.
758 P is the control record.
759 REPETITION is the repetition number. */
762 process_line_count (const struct control *p, int repetition)
764 unsigned int linenum;
765 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
766 struct cstring *line;
768 create_output_file ();
770 linenum = get_first_line_in_buffer ();
772 /* Initially, I wanted to assert linenum < last_line_to_save, but that
773 condition is false for the valid command: echo | csplit - 1 '{*}'.
774 So, relax it just a little. */
775 assert ((linenum == 1 && last_line_to_save == 1)
776 || linenum < last_line_to_save);
778 while (linenum++ < last_line_to_save)
780 line = remove_line ();
782 handle_line_error (p, repetition);
783 save_line_to_file (line);
786 close_output_file ();
788 /* Ensure that the line number specified is not 1 greater than
789 the number of lines in the file. */
790 if (no_more_lines ())
791 handle_line_error (p, repetition);
795 regexp_error (struct control *p, int repetition, boolean ignore)
797 fprintf (stderr, _("%s: `%s': match not found"),
798 program_name, global_argv[p->argnum]);
801 fprintf (stderr, _(" on repetition %d\n"), repetition);
803 fprintf (stderr, "\n");
807 dump_rest_of_file ();
808 close_output_file ();
813 /* Read the input until a line matches the regexp in P, outputting
814 it unless P->IGNORE is TRUE.
815 REPETITION is this repeat-count; 0 means the first time. */
818 process_regexp (struct control *p, int repetition)
820 struct cstring *line; /* From input file. */
821 unsigned int line_len; /* To make "$" in regexps work. */
822 unsigned int break_line; /* First line number of next file. */
823 boolean ignore = p->ignore; /* If TRUE, skip this section. */
827 create_output_file ();
829 /* If there is no offset for the regular expression, or
830 it is positive, then it is not necessary to buffer the lines. */
836 line = find_line (++current_line);
839 if (p->repeat_forever)
843 dump_rest_of_file ();
844 close_output_file ();
849 regexp_error (p, repetition, ignore);
851 line_len = line->len;
852 if (line->str[line_len - 1] == '\n')
854 ret = re_search (&p->re_compiled, line->str, line_len,
855 0, line_len, (struct re_registers *) 0);
858 error (0, 0, _("error in regular expression search"));
863 line = remove_line ();
865 save_line_to_file (line);
873 /* Buffer the lines. */
876 line = find_line (++current_line);
879 if (p->repeat_forever)
883 dump_rest_of_file ();
884 close_output_file ();
889 regexp_error (p, repetition, ignore);
891 line_len = line->len;
892 if (line->str[line_len - 1] == '\n')
894 ret = re_search (&p->re_compiled, line->str, line_len,
895 0, line_len, (struct re_registers *) 0);
898 error (0, 0, _("error in regular expression search"));
906 /* Account for any offset from this regexp. */
907 break_line = current_line + p->offset;
909 write_to_file (break_line, ignore, p->argnum);
912 close_output_file ();
915 current_line = break_line;
918 /* Split the input file according to the control records we have built. */
925 for (i = 0; i < control_used; i++)
927 if (controls[i].regexpr)
929 for (j = 0; (controls[i].repeat_forever
930 || j <= controls[i].repeat); j++)
931 process_regexp (&controls[i], j);
935 for (j = 0; (controls[i].repeat_forever
936 || j <= controls[i].repeat); j++)
937 process_line_count (&controls[i], j);
941 create_output_file ();
942 dump_rest_of_file ();
943 close_output_file ();
946 /* Return the name of output file number NUM. */
949 make_filename (unsigned int num)
951 strcpy (filename_space, prefix);
953 sprintf (filename_space+strlen(prefix), suffix, num);
955 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
956 return filename_space;
959 /* Create the next output file. */
962 create_output_file (void)
964 output_filename = make_filename (files_created);
965 output_stream = fopen (output_filename, "w");
966 if (output_stream == NULL)
968 error (0, errno, "%s", output_filename);
975 /* Delete all the files we have created. */
978 delete_all_files (void)
983 for (i = 0; i < files_created; i++)
985 name = make_filename (i);
987 error (0, errno, "%s", name);
991 /* Close the current output file and print the count
992 of characters in this file. */
995 close_output_file (void)
999 if (ferror (output_stream) || fclose (output_stream) == EOF)
1001 error (0, errno, _("write error for `%s'"), output_filename);
1002 output_stream = NULL;
1005 if (bytes_written == 0 && elide_empty_files)
1007 if (unlink (output_filename))
1008 error (0, errno, "%s", output_filename);
1013 /* FIXME: if we write to stdout here, we have to close stdout
1014 and check for errors. */
1015 if (!suppress_count)
1016 fprintf (stdout, "%d\n", bytes_written);
1018 output_stream = NULL;
1022 /* Save line LINE to the output file and
1023 increment the character count for the current file. */
1026 save_line_to_file (const struct cstring *line)
1028 fwrite (line->str, sizeof (char), line->len, output_stream);
1029 bytes_written += line->len;
1032 /* Return a new, initialized control record. */
1034 static struct control *
1035 new_control_record (void)
1037 static unsigned control_allocated = 0; /* Total space allocated. */
1040 if (control_allocated == 0)
1042 control_allocated = ALLOC_SIZE;
1043 controls = (struct control *)
1044 xmalloc (sizeof (struct control) * control_allocated);
1046 else if (control_used == control_allocated)
1048 control_allocated += ALLOC_SIZE;
1049 controls = (struct control *)
1050 xrealloc ((char *) controls,
1051 sizeof (struct control) * control_allocated);
1053 p = &controls[control_used++];
1056 p->repeat_forever = 0;
1057 p->lines_required = 0;
1062 /* Check if there is a numeric offset after a regular expression.
1063 STR is the entire command line argument.
1064 P is the control record for this regular expression.
1065 NUM is the numeric part of STR. */
1068 check_for_offset (struct control *p, const char *str, const char *num)
1072 if (*num != '-' && *num != '+')
1073 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1075 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1077 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1078 p->offset = (unsigned int) val;
1081 p->offset = -p->offset;
1084 /* Given that the first character of command line arg STR is '{',
1085 make sure that the rest of the string is a valid repeat count
1086 and store its value in P.
1087 ARGNUM is the ARGV index of STR. */
1090 parse_repeat_count (int argnum, struct control *p, char *str)
1095 end = str + strlen (str) - 1;
1097 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1100 if (str+1 == end-1 && *(str+1) == '*')
1101 p->repeat_forever = 1;
1104 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1107 error (EXIT_FAILURE, 0,
1108 _("%s}: integer required between `{' and `}'"),
1109 global_argv[argnum]);
1111 p->repeat = (unsigned int) val;
1117 /* Extract the regular expression from STR and check for a numeric offset.
1118 STR should start with the regexp delimiter character.
1119 Return a new control record for the regular expression.
1120 ARGNUM is the ARGV index of STR.
1121 Unless IGNORE is TRUE, mark these lines for output. */
1123 static struct control *
1124 extract_regexp (int argnum, boolean ignore, char *str)
1126 int len; /* Number of chars in this regexp. */
1128 char *closing_delim;
1132 closing_delim = strrchr (str + 1, delim);
1133 if (closing_delim == NULL)
1134 error (EXIT_FAILURE, 0,
1135 _("%s: closing delimeter `%c' missing"), str, delim);
1137 len = closing_delim - str - 1;
1138 p = new_control_record ();
1142 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1143 strncpy (p->regexpr, str + 1, len);
1144 p->re_compiled.allocated = len * 2;
1145 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1146 p->re_compiled.fastmap = xmalloc (256);
1147 p->re_compiled.translate = 0;
1148 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1151 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1155 if (closing_delim[1])
1156 check_for_offset (p, str, closing_delim + 1);
1161 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1162 After each pattern, check if the next argument is a repeat count. */
1165 parse_patterns (int argc, int start, char **argv)
1167 int i; /* Index into ARGV. */
1168 struct control *p; /* New control record created. */
1170 static unsigned long last_val = 0;
1172 for (i = start; i < argc; i++)
1174 if (*argv[i] == '/' || *argv[i] == '%')
1176 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1180 p = new_control_record ();
1183 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1185 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1187 error (EXIT_FAILURE, 0,
1188 _("%s: line number must be greater than zero"),
1191 error (EXIT_FAILURE, 0,
1192 _("line number `%s' is smaller than preceding line number, %lu"),
1195 if (val == last_val)
1197 _("warning: line number `%s' is the same as preceding line number"),
1201 p->lines_required = (int) val;
1204 if (i + 1 < argc && *argv[i + 1] == '{')
1206 /* We have a repeat count. */
1208 parse_repeat_count (i, p, argv[i]);
1214 get_format_flags (char **format_ptr)
1218 for (; **format_ptr; (*format_ptr)++)
1220 switch (**format_ptr)
1231 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1242 get_format_width (char **format_ptr)
1248 start = *format_ptr;
1249 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1252 ch_save = **format_ptr;
1253 **format_ptr = '\0';
1254 /* In the case where no minimum field width is explicitly specified,
1255 allow for enough octal digits to represent the value of LONG_MAX. */
1256 count = ((*format_ptr == start)
1257 ? bytes_to_octal_digits[sizeof (long)]
1259 **format_ptr = ch_save;
1264 get_format_prec (char **format_ptr)
1271 if (**format_ptr != '.')
1275 if (**format_ptr == '-' || **format_ptr == '+')
1277 is_negative = (**format_ptr == '-');
1285 start = *format_ptr;
1286 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1289 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1292 start = *format_ptr;
1294 ch_save = **format_ptr;
1295 **format_ptr = '\0';
1296 count = (*format_ptr == start) ? 11 : atoi (start);
1297 **format_ptr = ch_save;
1303 get_format_conv_type (char **format_ptr)
1305 int ch = *((*format_ptr)++);
1318 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1323 error (EXIT_FAILURE, 0,
1324 _("invalid conversion specifier in suffix: %c"), ch);
1326 error (EXIT_FAILURE, 0,
1327 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1332 max_out (char *format)
1334 unsigned out_count = 0;
1335 unsigned percents = 0;
1346 out_count += get_format_flags (&format);
1348 int width = get_format_width (&format);
1349 int prec = get_format_prec (&format);
1351 out_count += MAX (width, prec);
1353 get_format_conv_type (&format);
1358 error (EXIT_FAILURE, 0,
1359 _("missing %% conversion specification in suffix"));
1360 else if (percents > 1)
1361 error (EXIT_FAILURE, 0,
1362 _("too many %% conversion specifications in suffix"));
1368 main (int argc, char **argv)
1373 struct sigaction oldact, newact;
1376 program_name = argv[0];
1377 setlocale (LC_ALL, "");
1378 bindtextdomain (PACKAGE, LOCALEDIR);
1379 textdomain (PACKAGE);
1384 suppress_count = FALSE;
1385 remove_files = TRUE;
1386 prefix = DEFAULT_PREFIX;
1388 /* Change the way xmalloc and xrealloc fail. */
1389 xalloc_fail_func = cleanup;
1392 newact.sa_handler = interrupt_handler;
1393 sigemptyset (&newact.sa_mask);
1394 newact.sa_flags = 0;
1396 sigaction (SIGHUP, NULL, &oldact);
1397 if (oldact.sa_handler != SIG_IGN)
1398 sigaction (SIGHUP, &newact, NULL);
1400 sigaction (SIGINT, NULL, &oldact);
1401 if (oldact.sa_handler != SIG_IGN)
1402 sigaction (SIGINT, &newact, NULL);
1404 sigaction (SIGQUIT, NULL, &oldact);
1405 if (oldact.sa_handler != SIG_IGN)
1406 sigaction (SIGQUIT, &newact, NULL);
1408 sigaction (SIGTERM, NULL, &oldact);
1409 if (oldact.sa_handler != SIG_IGN)
1410 sigaction (SIGTERM, &newact, NULL);
1411 #else /* not SA_INTERRUPT */
1412 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1413 signal (SIGHUP, interrupt_handler);
1414 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1415 signal (SIGINT, interrupt_handler);
1416 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1417 signal (SIGQUIT, interrupt_handler);
1418 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1419 signal (SIGTERM, interrupt_handler);
1420 #endif /* not SA_INTERRUPT */
1422 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1437 remove_files = FALSE;
1441 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1443 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1449 suppress_count = TRUE;
1453 elide_empty_files = TRUE;
1456 case_GETOPT_HELP_CHAR;
1458 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1464 if (argc - optind < 2)
1466 error (0, 0, _("too few arguments"));
1471 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1473 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1475 set_input_file (argv[optind++]);
1477 parse_patterns (argc, optind, argv);
1481 if (close (input_desc) < 0)
1483 error (0, errno, _("read error"));
1487 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1488 error (EXIT_FAILURE, errno, _("write error"));
1490 exit (EXIT_SUCCESS);
1497 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1502 Usage: %s [OPTION]... FILE PATTERN...\n\
1506 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1507 and output byte counts of each piece to standard output.\n\
1509 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1510 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1511 -k, --keep-files do not remove output files on errors\n\
1512 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1513 -s, --quiet, --silent do not print counts of output file sizes\n\
1514 -z, --elide-empty-files remove empty output files\n\
1515 --help display this help and exit\n\
1516 --version output version information and exit\n\
1518 Read standard input if FILE is -. Each PATTERN may be:\n\
1520 INTEGER copy up to but not including specified line number\n\
1521 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1522 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1523 {INTEGER} repeat the previous pattern specified number of times\n\
1524 {*} repeat the previous pattern as many times as possible\n\
1526 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1528 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
1530 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);