1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
26 #include <sys/types.h>
35 #include "safe-read.h"
39 /* The official name of this program (e.g., no `g' prefix). */
40 #define PROGRAM_NAME "csplit"
42 #define AUTHORS "Stuart Kemp and David MacKenzie"
49 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
57 /* Increment size of area for control records. */
60 /* The default prefix for output file names. */
61 #define DEFAULT_PREFIX "xx"
65 /* A compiled pattern arg. */
68 char *regexpr; /* Non-compiled regular expression. */
69 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
70 int offset; /* Offset from regexp to split at. */
71 uintmax_t lines_required; /* Number of lines required. */
72 uintmax_t repeat; /* Repeat count. */
73 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
74 int argnum; /* ARGV index. */
75 boolean ignore; /* If true, produce no output (for regexp). */
78 /* Initial size of data area in buffers. */
79 #define START_SIZE 8191
81 /* Increment size for data area. */
82 #define INCR_SIZE 2048
84 /* Number of lines kept in each node in line list. */
88 /* Some small values to test the algorithms. */
89 # define START_SIZE 200
94 /* A string with a length count. */
101 /* Pointers to the beginnings of lines in the buffer area.
102 These structures are linked together if needed. */
105 unsigned used; /* Number of offsets used in this struct. */
106 unsigned insert_index; /* Next offset to use when inserting line. */
107 unsigned retrieve_index; /* Next index to use when retrieving line. */
108 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
109 struct line *next; /* Next in linked list. */
112 /* The structure to hold the input lines.
113 Contains a pointer to the data area and a list containing
114 pointers to the individual lines. */
117 unsigned bytes_alloc; /* Size of the buffer area. */
118 unsigned bytes_used; /* Bytes used in the buffer area. */
119 unsigned start_line; /* First line number in this buffer. */
120 unsigned first_available; /* First line that can be retrieved. */
121 unsigned num_lines; /* Number of complete lines in this buffer. */
122 char *buffer; /* Data area. */
123 struct line *line_start; /* Head of list of pointers to lines. */
124 struct line *curr_line; /* The line start record currently in use. */
125 struct buffer_record *next;
128 static void close_output_file PARAMS ((void));
129 static void create_output_file PARAMS ((void));
130 static void delete_all_files PARAMS ((void));
131 static void save_line_to_file PARAMS ((const struct cstring *line));
132 void usage PARAMS ((int status));
134 /* The name this program was run with. */
137 /* Convert the number of 8-bit bytes of a binary representation to
138 the number of characters required to represent the same quantity
139 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
140 require a field width as wide as 11 characters. */
141 static const unsigned int bytes_to_octal_digits[] =
142 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
144 /* Input file descriptor. */
145 static int input_desc = 0;
147 /* List of available buffers. */
148 static struct buffer_record *free_list = NULL;
150 /* Start of buffer list. */
151 static struct buffer_record *head = NULL;
153 /* Partially read line. */
154 static char *hold_area = NULL;
156 /* Number of chars in `hold_area'. */
157 static unsigned hold_count = 0;
159 /* Number of the last line in the buffers. */
160 static unsigned last_line_number = 0;
162 /* Number of the line currently being examined. */
163 static unsigned current_line = 0;
165 /* If TRUE, we have read EOF. */
166 static boolean have_read_eof = FALSE;
168 /* Name of output files. */
169 static char *filename_space = NULL;
171 /* Prefix part of output file names. */
172 static char *prefix = NULL;
174 /* Suffix part of output file names. */
175 static char *suffix = NULL;
177 /* Number of digits to use in output file names. */
178 static int digits = 2;
180 /* Number of files created so far. */
181 static unsigned int files_created = 0;
183 /* Number of bytes written to current file. */
184 static unsigned int bytes_written;
186 /* Output file pointer. */
187 static FILE *output_stream = NULL;
189 /* Output file name. */
190 static char *output_filename = NULL;
192 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
193 static char **global_argv;
195 /* If TRUE, do not print the count of bytes in each output file. */
196 static boolean suppress_count;
198 /* If TRUE, remove output files on error. */
199 static boolean remove_files;
201 /* If TRUE, remove all output files which have a zero length. */
202 static boolean elide_empty_files;
204 /* The compiled pattern arguments, which determine how to split
206 static struct control *controls;
208 /* Number of elements in `controls'. */
209 static unsigned int control_used;
211 static struct option const longopts[] =
213 {"digits", required_argument, NULL, 'n'},
214 {"quiet", no_argument, NULL, 'q'},
215 {"silent", no_argument, NULL, 's'},
216 {"keep-files", no_argument, NULL, 'k'},
217 {"elide-empty-files", no_argument, NULL, 'z'},
218 {"prefix", required_argument, NULL, 'f'},
219 {"suffix-format", required_argument, NULL, 'b'},
220 {GETOPT_HELP_OPTION_DECL},
221 {GETOPT_VERSION_OPTION_DECL},
225 /* Optionally remove files created so far; then exit.
226 Called when an error detected. */
232 close_output_file ();
246 interrupt_handler (int sig)
249 struct sigaction sigact;
251 sigact.sa_handler = SIG_DFL;
252 sigemptyset (&sigact.sa_mask);
254 sigaction (sig, &sigact, NULL);
255 #else /* !SA_INTERRUPT */
256 signal (sig, SIG_DFL);
257 #endif /* SA_INTERRUPT */
259 kill (getpid (), sig);
262 /* Keep track of NUM chars of a partial line in buffer START.
263 These chars will be retrieved later when another large buffer is read.
264 It is not necessary to create a new buffer for these chars; instead,
265 we keep a pointer to the existing buffer. This buffer *is* on the
266 free list, and when the next buffer is obtained from this list
267 (even if it is this one), these chars will be placed at the
268 start of the new buffer. */
271 save_to_hold_area (char *start, unsigned int num)
277 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
278 Return the number of chars read. */
281 read_input (char *dest, unsigned int max_n_bytes)
285 if (max_n_bytes == 0)
288 bytes_read = safe_read (input_desc, dest, max_n_bytes);
291 have_read_eof = TRUE;
295 error (0, errno, _("read error"));
302 /* Initialize existing line record P. */
305 clear_line_control (struct line *p)
309 p->retrieve_index = 0;
312 /* Initialize all line records in B. */
315 clear_all_line_control (struct buffer_record *b)
319 for (l = b->line_start; l; l = l->next)
320 clear_line_control (l);
323 /* Return a new, initialized line record. */
326 new_line_control (void)
330 p = (struct line *) xmalloc (sizeof (struct line));
333 clear_line_control (p);
338 /* Record LINE_START, which is the address of the start of a line
339 of length LINE_LEN in the large buffer, in the lines buffer of B. */
342 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
346 /* If there is no existing area to keep line info, get some. */
347 if (b->line_start == NULL)
348 b->line_start = b->curr_line = new_line_control ();
350 /* If existing area for lines is full, get more. */
351 if (b->curr_line->used == CTRL_SIZE)
353 b->curr_line->next = new_line_control ();
354 b->curr_line = b->curr_line->next;
359 /* Record the start of the line, and update counters. */
360 l->starts[l->insert_index].str = line_start;
361 l->starts[l->insert_index].len = line_len;
366 /* Scan the buffer in B for newline characters
367 and record the line start locations and lengths in B.
368 Return the number of lines found in this buffer.
370 There may be an incomplete line at the end of the buffer;
371 a pointer is kept to this area, which will be used when
372 the next buffer is filled. */
375 record_line_starts (struct buffer_record *b)
377 char *line_start; /* Start of current line. */
378 char *line_end; /* End of each line found. */
379 unsigned int bytes_left; /* Length of incomplete last line. */
380 unsigned int lines; /* Number of lines found. */
381 unsigned int line_length; /* Length of each line found. */
383 if (b->bytes_used == 0)
387 line_start = b->buffer;
388 bytes_left = b->bytes_used;
392 line_end = memchr (line_start, '\n', bytes_left);
393 if (line_end == NULL)
395 line_length = line_end - line_start + 1;
396 keep_new_line (b, line_start, line_length);
397 bytes_left -= line_length;
398 line_start = line_end + 1;
402 /* Check for an incomplete last line. */
407 keep_new_line (b, line_start, bytes_left);
411 save_to_hold_area (line_start, bytes_left);
414 b->num_lines = lines;
415 b->first_available = b->start_line = last_line_number + 1;
416 last_line_number += lines;
421 /* Return a new buffer with room to store SIZE bytes, plus
422 an extra byte for safety. */
424 static struct buffer_record *
425 create_new_buffer (unsigned int size)
427 struct buffer_record *new_buffer;
429 new_buffer = (struct buffer_record *)
430 xmalloc (sizeof (struct buffer_record));
432 new_buffer->buffer = (char *) xmalloc (size + 1);
434 new_buffer->bytes_alloc = size;
435 new_buffer->line_start = new_buffer->curr_line = NULL;
440 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
441 least that size is currently free, use it, otherwise create a new one. */
443 static struct buffer_record *
444 get_new_buffer (unsigned int min_size)
446 struct buffer_record *p, *q;
447 struct buffer_record *new_buffer; /* Buffer to return. */
448 unsigned int alloc_size; /* Actual size that will be requested. */
450 alloc_size = START_SIZE;
451 while (min_size > alloc_size)
452 alloc_size += INCR_SIZE;
454 if (free_list == NULL)
455 new_buffer = create_new_buffer (alloc_size);
458 /* Use first-fit to find a buffer. */
459 p = new_buffer = NULL;
464 if (q->bytes_alloc >= min_size)
477 new_buffer = (q ? q : create_new_buffer (alloc_size));
479 new_buffer->curr_line = new_buffer->line_start;
480 clear_all_line_control (new_buffer);
483 new_buffer->num_lines = 0;
484 new_buffer->bytes_used = 0;
485 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
486 new_buffer->next = NULL;
491 /* Add buffer BUF to the list of free buffers. */
494 free_buffer (struct buffer_record *buf)
496 buf->next = free_list;
500 /* Append buffer BUF to the linked list of buffers that contain
501 some data yet to be processed. */
504 save_buffer (struct buffer_record *buf)
506 struct buffer_record *p;
509 buf->curr_line = buf->line_start;
515 for (p = head; p->next; p = p->next)
521 /* Fill a buffer of input.
523 Set the initial size of the buffer to a default.
524 Fill the buffer (from the hold area and input stream)
525 and find the individual lines.
526 If no lines are found (the buffer is too small to hold the next line),
527 release the current buffer (whose contents would have been put in the
528 hold area) and repeat the process with another large buffer until at least
529 one entire line has been read.
531 Return TRUE if a new buffer was obtained, otherwise false
532 (in which case end-of-file must have been encountered). */
537 struct buffer_record *b;
538 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
539 unsigned int bytes_avail; /* Size of new buffer created. */
540 unsigned int lines_found; /* Number of lines in this new buffer. */
541 char *p; /* Place to load into buffer. */
546 /* We must make the buffer at least as large as the amount of data
547 in the partial line left over from the last call. */
548 if (bytes_wanted < hold_count)
549 bytes_wanted = hold_count;
553 b = get_new_buffer (bytes_wanted);
554 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
557 /* First check the `holding' area for a partial line. */
561 memcpy (p, hold_area, hold_count);
563 b->bytes_used += hold_count;
564 bytes_avail -= hold_count;
568 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
570 lines_found = record_line_starts (b);
571 bytes_wanted = b->bytes_alloc * 2;
575 while (!lines_found && !have_read_eof);
580 return lines_found != 0;
583 /* Return the line number of the first line that has not yet been retrieved. */
586 get_first_line_in_buffer (void)
588 if (head == NULL && !load_buffer ())
589 error (EXIT_FAILURE, errno, _("input disappeared"));
591 return head->first_available;
594 /* Return a pointer to the logical first line in the buffer and make the
595 next line the logical first line.
596 Return NULL if there is no more input. */
598 static struct cstring *
601 struct cstring *line; /* Return value. */
602 struct line *l; /* For convenience. */
604 if (head == NULL && !load_buffer ())
607 if (current_line < head->first_available)
608 current_line = head->first_available;
610 ++(head->first_available);
614 line = &l->starts[l->retrieve_index];
616 /* Advance index to next line. */
617 if (++l->retrieve_index == l->used)
619 /* Go on to the next line record. */
620 head->curr_line = l->next;
621 if (head->curr_line == NULL || head->curr_line->used == 0)
623 /* Go on to the next data block. */
624 struct buffer_record *b = head;
633 /* Search the buffers for line LINENUM, reading more input if necessary.
634 Return a pointer to the line, or NULL if it is not found in the file. */
636 static struct cstring *
637 find_line (unsigned int linenum)
639 struct buffer_record *b;
641 if (head == NULL && !load_buffer ())
644 if (linenum < head->start_line)
649 if (linenum < b->start_line + b->num_lines)
651 /* The line is in this buffer. */
653 unsigned int offset; /* How far into the buffer the line is. */
656 offset = linenum - b->start_line;
657 /* Find the control record. */
658 while (offset >= CTRL_SIZE)
663 return &l->starts[offset];
665 if (b->next == NULL && !load_buffer ())
667 b = b->next; /* Try the next data block. */
671 /* Return TRUE if at least one more line is available for input. */
676 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
679 /* Set the name of the input file to NAME and open it. */
682 set_input_file (const char *name)
684 if (STREQ (name, "-"))
688 input_desc = open (name, O_RDONLY);
690 error (EXIT_FAILURE, errno, "%s", name);
694 /* Write all lines from the beginning of the buffer up to, but
695 not including, line LAST_LINE, to the current output file.
696 If IGNORE is TRUE, do not output lines selected here.
697 ARGNUM is the index in ARGV of the current pattern. */
700 write_to_file (unsigned int last_line, boolean ignore, int argnum)
702 struct cstring *line;
703 unsigned int first_line; /* First available input line. */
704 unsigned int lines; /* Number of lines to output. */
707 first_line = get_first_line_in_buffer ();
709 if (first_line > last_line)
711 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
715 lines = last_line - first_line;
717 for (i = 0; i < lines; i++)
719 line = remove_line ();
722 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
726 save_line_to_file (line);
730 /* Output any lines left after all regexps have been processed. */
733 dump_rest_of_file (void)
735 struct cstring *line;
737 while ((line = remove_line ()) != NULL)
738 save_line_to_file (line);
741 /* Handle an attempt to read beyond EOF under the control of record P,
742 on iteration REPETITION if nonzero. */
745 handle_line_error (const struct control *p, int repetition)
747 char buf[LONGEST_HUMAN_READABLE + 1];
749 fprintf (stderr, _("%s: `%d': line number out of range"),
750 program_name, human_readable (p->lines_required, buf, 1, 1));
752 fprintf (stderr, _(" on repetition %d\n"), repetition);
754 fprintf (stderr, "\n");
759 /* Determine the line number that marks the end of this file,
760 then get those lines and save them to the output file.
761 P is the control record.
762 REPETITION is the repetition number. */
765 process_line_count (const struct control *p, int repetition)
767 unsigned int linenum;
768 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
769 struct cstring *line;
771 create_output_file ();
773 linenum = get_first_line_in_buffer ();
775 /* Initially, I wanted to assert linenum < last_line_to_save, but that
776 condition is false for the valid command: echo | csplit - 1 '{*}'.
777 So, relax it just a little. */
778 assert ((linenum == 1 && last_line_to_save == 1)
779 || linenum < last_line_to_save);
781 while (linenum++ < last_line_to_save)
783 line = remove_line ();
785 handle_line_error (p, repetition);
786 save_line_to_file (line);
789 close_output_file ();
791 /* Ensure that the line number specified is not 1 greater than
792 the number of lines in the file. */
793 if (no_more_lines ())
794 handle_line_error (p, repetition);
798 regexp_error (struct control *p, int repetition, boolean ignore)
800 fprintf (stderr, _("%s: `%s': match not found"),
801 program_name, global_argv[p->argnum]);
804 fprintf (stderr, _(" on repetition %d\n"), repetition);
806 fprintf (stderr, "\n");
810 dump_rest_of_file ();
811 close_output_file ();
816 /* Read the input until a line matches the regexp in P, outputting
817 it unless P->IGNORE is TRUE.
818 REPETITION is this repeat-count; 0 means the first time. */
821 process_regexp (struct control *p, int repetition)
823 struct cstring *line; /* From input file. */
824 unsigned int line_len; /* To make "$" in regexps work. */
825 unsigned int break_line; /* First line number of next file. */
826 boolean ignore = p->ignore; /* If TRUE, skip this section. */
830 create_output_file ();
832 /* If there is no offset for the regular expression, or
833 it is positive, then it is not necessary to buffer the lines. */
839 line = find_line (++current_line);
842 if (p->repeat_forever)
846 dump_rest_of_file ();
847 close_output_file ();
852 regexp_error (p, repetition, ignore);
854 line_len = line->len;
855 if (line->str[line_len - 1] == '\n')
857 ret = re_search (&p->re_compiled, line->str, line_len,
858 0, line_len, (struct re_registers *) 0);
861 error (0, 0, _("error in regular expression search"));
866 line = remove_line ();
868 save_line_to_file (line);
876 /* Buffer the lines. */
879 line = find_line (++current_line);
882 if (p->repeat_forever)
886 dump_rest_of_file ();
887 close_output_file ();
892 regexp_error (p, repetition, ignore);
894 line_len = line->len;
895 if (line->str[line_len - 1] == '\n')
897 ret = re_search (&p->re_compiled, line->str, line_len,
898 0, line_len, (struct re_registers *) 0);
901 error (0, 0, _("error in regular expression search"));
909 /* Account for any offset from this regexp. */
910 break_line = current_line + p->offset;
912 write_to_file (break_line, ignore, p->argnum);
915 close_output_file ();
918 current_line = break_line;
921 /* Split the input file according to the control records we have built. */
928 for (i = 0; i < control_used; i++)
930 if (controls[i].regexpr)
932 for (j = 0; (controls[i].repeat_forever
933 || j <= controls[i].repeat); j++)
934 process_regexp (&controls[i], j);
938 for (j = 0; (controls[i].repeat_forever
939 || j <= controls[i].repeat); j++)
940 process_line_count (&controls[i], j);
944 create_output_file ();
945 dump_rest_of_file ();
946 close_output_file ();
949 /* Return the name of output file number NUM. */
952 make_filename (unsigned int num)
954 strcpy (filename_space, prefix);
956 sprintf (filename_space+strlen(prefix), suffix, num);
958 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
959 return filename_space;
962 /* Create the next output file. */
965 create_output_file (void)
967 output_filename = make_filename (files_created);
968 output_stream = fopen (output_filename, "w");
969 if (output_stream == NULL)
971 error (0, errno, "%s", output_filename);
978 /* Delete all the files we have created. */
981 delete_all_files (void)
986 for (i = 0; i < files_created; i++)
988 name = make_filename (i);
990 error (0, errno, "%s", name);
994 /* Close the current output file and print the count
995 of characters in this file. */
998 close_output_file (void)
1002 if (ferror (output_stream) || fclose (output_stream) == EOF)
1004 error (0, errno, _("write error for `%s'"), output_filename);
1005 output_stream = NULL;
1008 if (bytes_written == 0 && elide_empty_files)
1010 if (unlink (output_filename))
1011 error (0, errno, "%s", output_filename);
1016 /* FIXME: if we write to stdout here, we have to close stdout
1017 and check for errors. */
1018 if (!suppress_count)
1019 fprintf (stdout, "%d\n", bytes_written);
1021 output_stream = NULL;
1025 /* Save line LINE to the output file and
1026 increment the character count for the current file. */
1029 save_line_to_file (const struct cstring *line)
1031 fwrite (line->str, sizeof (char), line->len, output_stream);
1032 bytes_written += line->len;
1035 /* Return a new, initialized control record. */
1037 static struct control *
1038 new_control_record (void)
1040 static unsigned control_allocated = 0; /* Total space allocated. */
1043 if (control_allocated == 0)
1045 control_allocated = ALLOC_SIZE;
1046 controls = (struct control *)
1047 xmalloc (sizeof (struct control) * control_allocated);
1049 else if (control_used == control_allocated)
1051 control_allocated += ALLOC_SIZE;
1052 controls = (struct control *)
1053 xrealloc ((char *) controls,
1054 sizeof (struct control) * control_allocated);
1056 p = &controls[control_used++];
1059 p->repeat_forever = 0;
1060 p->lines_required = 0;
1065 /* Check if there is a numeric offset after a regular expression.
1066 STR is the entire command line argument.
1067 P is the control record for this regular expression.
1068 NUM is the numeric part of STR. */
1071 check_for_offset (struct control *p, const char *str, const char *num)
1075 if (*num != '-' && *num != '+')
1076 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1078 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1080 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1081 p->offset = (unsigned int) val;
1084 p->offset = -p->offset;
1087 /* Given that the first character of command line arg STR is '{',
1088 make sure that the rest of the string is a valid repeat count
1089 and store its value in P.
1090 ARGNUM is the ARGV index of STR. */
1093 parse_repeat_count (int argnum, struct control *p, char *str)
1098 end = str + strlen (str) - 1;
1100 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1103 if (str+1 == end-1 && *(str+1) == '*')
1104 p->repeat_forever = 1;
1107 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1109 error (EXIT_FAILURE, 0,
1110 _("%s}: integer required between `{' and `}'"),
1111 global_argv[argnum]);
1119 /* Extract the regular expression from STR and check for a numeric offset.
1120 STR should start with the regexp delimiter character.
1121 Return a new control record for the regular expression.
1122 ARGNUM is the ARGV index of STR.
1123 Unless IGNORE is TRUE, mark these lines for output. */
1125 static struct control *
1126 extract_regexp (int argnum, boolean ignore, char *str)
1128 int len; /* Number of chars in this regexp. */
1130 char *closing_delim;
1134 closing_delim = strrchr (str + 1, delim);
1135 if (closing_delim == NULL)
1136 error (EXIT_FAILURE, 0,
1137 _("%s: closing delimeter `%c' missing"), str, delim);
1139 len = closing_delim - str - 1;
1140 p = new_control_record ();
1144 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1145 strncpy (p->regexpr, str + 1, len);
1146 p->re_compiled.allocated = len * 2;
1147 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1148 p->re_compiled.fastmap = xmalloc (256);
1149 p->re_compiled.translate = 0;
1150 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1153 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1157 if (closing_delim[1])
1158 check_for_offset (p, str, closing_delim + 1);
1163 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1164 After each pattern, check if the next argument is a repeat count. */
1167 parse_patterns (int argc, int start, char **argv)
1169 int i; /* Index into ARGV. */
1170 struct control *p; /* New control record created. */
1172 static uintmax_t last_val = 0;
1174 for (i = start; i < argc; i++)
1176 if (*argv[i] == '/' || *argv[i] == '%')
1178 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1182 p = new_control_record ();
1185 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1186 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1188 error (EXIT_FAILURE, 0,
1189 _("%s: line number must be greater than zero"),
1193 char buf[LONGEST_HUMAN_READABLE + 1];
1194 error (EXIT_FAILURE, 0,
1195 _("line number `%s' is smaller than preceding line number, %s"),
1196 argv[i], human_readable (last_val, buf, 1, 1));
1199 if (val == last_val)
1201 _("warning: line number `%s' is the same as preceding line number"),
1206 p->lines_required = val;
1209 if (i + 1 < argc && *argv[i + 1] == '{')
1211 /* We have a repeat count. */
1213 parse_repeat_count (i, p, argv[i]);
1219 get_format_flags (char **format_ptr)
1223 for (; **format_ptr; (*format_ptr)++)
1225 switch (**format_ptr)
1236 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1247 get_format_width (char **format_ptr)
1253 start = *format_ptr;
1254 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1257 ch_save = **format_ptr;
1258 **format_ptr = '\0';
1259 /* In the case where no minimum field width is explicitly specified,
1260 allow for enough octal digits to represent the value of LONG_MAX. */
1261 count = ((*format_ptr == start)
1262 ? bytes_to_octal_digits[sizeof (long)]
1264 **format_ptr = ch_save;
1269 get_format_prec (char **format_ptr)
1276 if (**format_ptr != '.')
1280 if (**format_ptr == '-' || **format_ptr == '+')
1282 is_negative = (**format_ptr == '-');
1290 start = *format_ptr;
1291 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1294 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1297 start = *format_ptr;
1299 ch_save = **format_ptr;
1300 **format_ptr = '\0';
1301 count = (*format_ptr == start) ? 11 : atoi (start);
1302 **format_ptr = ch_save;
1308 get_format_conv_type (char **format_ptr)
1310 int ch = *((*format_ptr)++);
1323 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1328 error (EXIT_FAILURE, 0,
1329 _("invalid conversion specifier in suffix: %c"), ch);
1331 error (EXIT_FAILURE, 0,
1332 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1337 max_out (char *format)
1339 unsigned out_count = 0;
1340 unsigned percents = 0;
1351 out_count += get_format_flags (&format);
1353 int width = get_format_width (&format);
1354 int prec = get_format_prec (&format);
1356 out_count += MAX (width, prec);
1358 get_format_conv_type (&format);
1363 error (EXIT_FAILURE, 0,
1364 _("missing %% conversion specification in suffix"));
1365 else if (percents > 1)
1366 error (EXIT_FAILURE, 0,
1367 _("too many %% conversion specifications in suffix"));
1373 main (int argc, char **argv)
1378 struct sigaction oldact, newact;
1381 program_name = argv[0];
1382 setlocale (LC_ALL, "");
1383 bindtextdomain (PACKAGE, LOCALEDIR);
1384 textdomain (PACKAGE);
1389 suppress_count = FALSE;
1390 remove_files = TRUE;
1391 prefix = DEFAULT_PREFIX;
1393 /* Change the way xmalloc and xrealloc fail. */
1394 xalloc_fail_func = cleanup;
1397 newact.sa_handler = interrupt_handler;
1398 sigemptyset (&newact.sa_mask);
1399 newact.sa_flags = 0;
1401 sigaction (SIGHUP, NULL, &oldact);
1402 if (oldact.sa_handler != SIG_IGN)
1403 sigaction (SIGHUP, &newact, NULL);
1405 sigaction (SIGINT, NULL, &oldact);
1406 if (oldact.sa_handler != SIG_IGN)
1407 sigaction (SIGINT, &newact, NULL);
1409 sigaction (SIGQUIT, NULL, &oldact);
1410 if (oldact.sa_handler != SIG_IGN)
1411 sigaction (SIGQUIT, &newact, NULL);
1413 sigaction (SIGTERM, NULL, &oldact);
1414 if (oldact.sa_handler != SIG_IGN)
1415 sigaction (SIGTERM, &newact, NULL);
1416 #else /* not SA_INTERRUPT */
1417 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1418 signal (SIGHUP, interrupt_handler);
1419 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1420 signal (SIGINT, interrupt_handler);
1421 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1422 signal (SIGQUIT, interrupt_handler);
1423 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1424 signal (SIGTERM, interrupt_handler);
1425 #endif /* not SA_INTERRUPT */
1427 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1442 remove_files = FALSE;
1446 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1448 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1454 suppress_count = TRUE;
1458 elide_empty_files = TRUE;
1461 case_GETOPT_HELP_CHAR;
1463 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1469 if (argc - optind < 2)
1471 error (0, 0, _("too few arguments"));
1476 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1478 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1480 set_input_file (argv[optind++]);
1482 parse_patterns (argc, optind, argv);
1486 if (close (input_desc) < 0)
1488 error (0, errno, _("read error"));
1492 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1493 error (EXIT_FAILURE, errno, _("write error"));
1495 exit (EXIT_SUCCESS);
1502 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1507 Usage: %s [OPTION]... FILE PATTERN...\n\
1511 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1512 and output byte counts of each piece to standard output.\n\
1514 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1515 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1516 -k, --keep-files do not remove output files on errors\n\
1517 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1518 -s, --quiet, --silent do not print counts of output file sizes\n\
1519 -z, --elide-empty-files remove empty output files\n\
1520 --help display this help and exit\n\
1521 --version output version information and exit\n\
1523 Read standard input if FILE is -. Each PATTERN may be:\n\
1525 INTEGER copy up to but not including specified line number\n\
1526 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1527 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1528 {INTEGER} repeat the previous pattern specified number of times\n\
1529 {*} repeat the previous pattern as many times as possible\n\
1531 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1533 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
1535 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);