1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
26 #include <sys/types.h>
36 #include "safe-read.h"
40 /* The official name of this program (e.g., no `g' prefix). */
41 #define PROGRAM_NAME "csplit"
43 #define AUTHORS N_ ("Stuart Kemp and David MacKenzie")
54 /* Increment size of area for control records. */
57 /* The default prefix for output file names. */
58 #define DEFAULT_PREFIX "xx"
62 /* A compiled pattern arg. */
65 char *regexpr; /* Non-compiled regular expression. */
66 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
67 int offset; /* Offset from regexp to split at. */
68 uintmax_t lines_required; /* Number of lines required. */
69 uintmax_t repeat; /* Repeat count. */
70 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
71 int argnum; /* ARGV index. */
72 boolean ignore; /* If true, produce no output (for regexp). */
75 /* Initial size of data area in buffers. */
76 #define START_SIZE 8191
78 /* Increment size for data area. */
79 #define INCR_SIZE 2048
81 /* Number of lines kept in each node in line list. */
85 /* Some small values to test the algorithms. */
86 # define START_SIZE 200
91 /* A string with a length count. */
98 /* Pointers to the beginnings of lines in the buffer area.
99 These structures are linked together if needed. */
102 unsigned used; /* Number of offsets used in this struct. */
103 unsigned insert_index; /* Next offset to use when inserting line. */
104 unsigned retrieve_index; /* Next index to use when retrieving line. */
105 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
106 struct line *next; /* Next in linked list. */
109 /* The structure to hold the input lines.
110 Contains a pointer to the data area and a list containing
111 pointers to the individual lines. */
114 unsigned bytes_alloc; /* Size of the buffer area. */
115 unsigned bytes_used; /* Bytes used in the buffer area. */
116 unsigned start_line; /* First line number in this buffer. */
117 unsigned first_available; /* First line that can be retrieved. */
118 unsigned num_lines; /* Number of complete lines in this buffer. */
119 char *buffer; /* Data area. */
120 struct line *line_start; /* Head of list of pointers to lines. */
121 struct line *curr_line; /* The line start record currently in use. */
122 struct buffer_record *next;
125 static void close_output_file PARAMS ((void));
126 static void create_output_file PARAMS ((void));
127 static void delete_all_files PARAMS ((void));
128 static void save_line_to_file PARAMS ((const struct cstring *line));
129 void usage PARAMS ((int status));
131 /* The name this program was run with. */
134 /* Convert the number of 8-bit bytes of a binary representation to
135 the number of characters required to represent the same quantity
136 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
137 require a field width as wide as 11 characters. */
138 static const unsigned int bytes_to_octal_digits[] =
139 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
141 /* Input file descriptor. */
142 static int input_desc = 0;
144 /* List of available buffers. */
145 static struct buffer_record *free_list = NULL;
147 /* Start of buffer list. */
148 static struct buffer_record *head = NULL;
150 /* Partially read line. */
151 static char *hold_area = NULL;
153 /* Number of chars in `hold_area'. */
154 static unsigned hold_count = 0;
156 /* Number of the last line in the buffers. */
157 static unsigned last_line_number = 0;
159 /* Number of the line currently being examined. */
160 static unsigned current_line = 0;
162 /* If TRUE, we have read EOF. */
163 static boolean have_read_eof = FALSE;
165 /* Name of output files. */
166 static char *filename_space = NULL;
168 /* Prefix part of output file names. */
169 static char *prefix = NULL;
171 /* Suffix part of output file names. */
172 static char *suffix = NULL;
174 /* Number of digits to use in output file names. */
175 static int digits = 2;
177 /* Number of files created so far. */
178 static unsigned int files_created = 0;
180 /* Number of bytes written to current file. */
181 static unsigned int bytes_written;
183 /* Output file pointer. */
184 static FILE *output_stream = NULL;
186 /* Output file name. */
187 static char *output_filename = NULL;
189 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
190 static char **global_argv;
192 /* If TRUE, do not print the count of bytes in each output file. */
193 static boolean suppress_count;
195 /* If TRUE, remove output files on error. */
196 static boolean remove_files;
198 /* If TRUE, remove all output files which have a zero length. */
199 static boolean elide_empty_files;
201 /* The compiled pattern arguments, which determine how to split
203 static struct control *controls;
205 /* Number of elements in `controls'. */
206 static unsigned int control_used;
208 static struct option const longopts[] =
210 {"digits", required_argument, NULL, 'n'},
211 {"quiet", no_argument, NULL, 'q'},
212 {"silent", no_argument, NULL, 's'},
213 {"keep-files", no_argument, NULL, 'k'},
214 {"elide-empty-files", no_argument, NULL, 'z'},
215 {"prefix", required_argument, NULL, 'f'},
216 {"suffix-format", required_argument, NULL, 'b'},
217 {GETOPT_HELP_OPTION_DECL},
218 {GETOPT_VERSION_OPTION_DECL},
222 /* Optionally remove files created so far; then exit.
223 Called when an error detected. */
229 close_output_file ();
243 interrupt_handler (int sig)
246 struct sigaction sigact;
248 sigact.sa_handler = SIG_DFL;
249 sigemptyset (&sigact.sa_mask);
251 sigaction (sig, &sigact, NULL);
253 signal (sig, SIG_DFL);
256 kill (getpid (), sig);
259 /* Keep track of NUM chars of a partial line in buffer START.
260 These chars will be retrieved later when another large buffer is read.
261 It is not necessary to create a new buffer for these chars; instead,
262 we keep a pointer to the existing buffer. This buffer *is* on the
263 free list, and when the next buffer is obtained from this list
264 (even if it is this one), these chars will be placed at the
265 start of the new buffer. */
268 save_to_hold_area (char *start, unsigned int num)
274 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
275 Return the number of chars read. */
278 read_input (char *dest, unsigned int max_n_bytes)
282 if (max_n_bytes == 0)
285 bytes_read = safe_read (input_desc, dest, max_n_bytes);
288 have_read_eof = TRUE;
292 error (0, errno, _("read error"));
299 /* Initialize existing line record P. */
302 clear_line_control (struct line *p)
306 p->retrieve_index = 0;
309 /* Initialize all line records in B. */
312 clear_all_line_control (struct buffer_record *b)
316 for (l = b->line_start; l; l = l->next)
317 clear_line_control (l);
320 /* Return a new, initialized line record. */
323 new_line_control (void)
327 p = (struct line *) xmalloc (sizeof (struct line));
330 clear_line_control (p);
335 /* Record LINE_START, which is the address of the start of a line
336 of length LINE_LEN in the large buffer, in the lines buffer of B. */
339 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
343 /* If there is no existing area to keep line info, get some. */
344 if (b->line_start == NULL)
345 b->line_start = b->curr_line = new_line_control ();
347 /* If existing area for lines is full, get more. */
348 if (b->curr_line->used == CTRL_SIZE)
350 b->curr_line->next = new_line_control ();
351 b->curr_line = b->curr_line->next;
356 /* Record the start of the line, and update counters. */
357 l->starts[l->insert_index].str = line_start;
358 l->starts[l->insert_index].len = line_len;
363 /* Scan the buffer in B for newline characters
364 and record the line start locations and lengths in B.
365 Return the number of lines found in this buffer.
367 There may be an incomplete line at the end of the buffer;
368 a pointer is kept to this area, which will be used when
369 the next buffer is filled. */
372 record_line_starts (struct buffer_record *b)
374 char *line_start; /* Start of current line. */
375 char *line_end; /* End of each line found. */
376 unsigned int bytes_left; /* Length of incomplete last line. */
377 unsigned int lines; /* Number of lines found. */
378 unsigned int line_length; /* Length of each line found. */
380 if (b->bytes_used == 0)
384 line_start = b->buffer;
385 bytes_left = b->bytes_used;
389 line_end = memchr (line_start, '\n', bytes_left);
390 if (line_end == NULL)
392 line_length = line_end - line_start + 1;
393 keep_new_line (b, line_start, line_length);
394 bytes_left -= line_length;
395 line_start = line_end + 1;
399 /* Check for an incomplete last line. */
404 keep_new_line (b, line_start, bytes_left);
408 save_to_hold_area (line_start, bytes_left);
411 b->num_lines = lines;
412 b->first_available = b->start_line = last_line_number + 1;
413 last_line_number += lines;
418 /* Return a new buffer with room to store SIZE bytes, plus
419 an extra byte for safety. */
421 static struct buffer_record *
422 create_new_buffer (unsigned int size)
424 struct buffer_record *new_buffer;
426 new_buffer = (struct buffer_record *)
427 xmalloc (sizeof (struct buffer_record));
429 new_buffer->buffer = (char *) xmalloc (size + 1);
431 new_buffer->bytes_alloc = size;
432 new_buffer->line_start = new_buffer->curr_line = NULL;
437 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
438 least that size is currently free, use it, otherwise create a new one. */
440 static struct buffer_record *
441 get_new_buffer (unsigned int min_size)
443 struct buffer_record *p, *q;
444 struct buffer_record *new_buffer; /* Buffer to return. */
445 unsigned int alloc_size; /* Actual size that will be requested. */
447 alloc_size = START_SIZE;
448 while (min_size > alloc_size)
449 alloc_size += INCR_SIZE;
451 if (free_list == NULL)
452 new_buffer = create_new_buffer (alloc_size);
455 /* Use first-fit to find a buffer. */
456 p = new_buffer = NULL;
461 if (q->bytes_alloc >= min_size)
474 new_buffer = (q ? q : create_new_buffer (alloc_size));
476 new_buffer->curr_line = new_buffer->line_start;
477 clear_all_line_control (new_buffer);
480 new_buffer->num_lines = 0;
481 new_buffer->bytes_used = 0;
482 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
483 new_buffer->next = NULL;
488 /* Add buffer BUF to the list of free buffers. */
491 free_buffer (struct buffer_record *buf)
493 buf->next = free_list;
497 /* Append buffer BUF to the linked list of buffers that contain
498 some data yet to be processed. */
501 save_buffer (struct buffer_record *buf)
503 struct buffer_record *p;
506 buf->curr_line = buf->line_start;
512 for (p = head; p->next; p = p->next)
518 /* Fill a buffer of input.
520 Set the initial size of the buffer to a default.
521 Fill the buffer (from the hold area and input stream)
522 and find the individual lines.
523 If no lines are found (the buffer is too small to hold the next line),
524 release the current buffer (whose contents would have been put in the
525 hold area) and repeat the process with another large buffer until at least
526 one entire line has been read.
528 Return TRUE if a new buffer was obtained, otherwise false
529 (in which case end-of-file must have been encountered). */
534 struct buffer_record *b;
535 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
536 unsigned int bytes_avail; /* Size of new buffer created. */
537 unsigned int lines_found; /* Number of lines in this new buffer. */
538 char *p; /* Place to load into buffer. */
543 /* We must make the buffer at least as large as the amount of data
544 in the partial line left over from the last call. */
545 if (bytes_wanted < hold_count)
546 bytes_wanted = hold_count;
550 b = get_new_buffer (bytes_wanted);
551 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
554 /* First check the `holding' area for a partial line. */
558 memcpy (p, hold_area, hold_count);
560 b->bytes_used += hold_count;
561 bytes_avail -= hold_count;
565 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
567 lines_found = record_line_starts (b);
568 bytes_wanted = b->bytes_alloc * 2;
572 while (!lines_found && !have_read_eof);
577 return lines_found != 0;
580 /* Return the line number of the first line that has not yet been retrieved. */
583 get_first_line_in_buffer (void)
585 if (head == NULL && !load_buffer ())
586 error (EXIT_FAILURE, errno, _("input disappeared"));
588 return head->first_available;
591 /* Return a pointer to the logical first line in the buffer and make the
592 next line the logical first line.
593 Return NULL if there is no more input. */
595 static struct cstring *
598 struct cstring *line; /* Return value. */
599 struct line *l; /* For convenience. */
601 if (head == NULL && !load_buffer ())
604 if (current_line < head->first_available)
605 current_line = head->first_available;
607 ++(head->first_available);
611 line = &l->starts[l->retrieve_index];
613 /* Advance index to next line. */
614 if (++l->retrieve_index == l->used)
616 /* Go on to the next line record. */
617 head->curr_line = l->next;
618 if (head->curr_line == NULL || head->curr_line->used == 0)
620 /* Go on to the next data block. */
621 struct buffer_record *b = head;
630 /* Search the buffers for line LINENUM, reading more input if necessary.
631 Return a pointer to the line, or NULL if it is not found in the file. */
633 static struct cstring *
634 find_line (unsigned int linenum)
636 struct buffer_record *b;
638 if (head == NULL && !load_buffer ())
641 if (linenum < head->start_line)
646 if (linenum < b->start_line + b->num_lines)
648 /* The line is in this buffer. */
650 unsigned int offset; /* How far into the buffer the line is. */
653 offset = linenum - b->start_line;
654 /* Find the control record. */
655 while (offset >= CTRL_SIZE)
660 return &l->starts[offset];
662 if (b->next == NULL && !load_buffer ())
664 b = b->next; /* Try the next data block. */
668 /* Return TRUE if at least one more line is available for input. */
673 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
676 /* Set the name of the input file to NAME and open it. */
679 set_input_file (const char *name)
681 if (STREQ (name, "-"))
685 input_desc = open (name, O_RDONLY);
687 error (EXIT_FAILURE, errno, "%s", name);
691 /* Write all lines from the beginning of the buffer up to, but
692 not including, line LAST_LINE, to the current output file.
693 If IGNORE is TRUE, do not output lines selected here.
694 ARGNUM is the index in ARGV of the current pattern. */
697 write_to_file (unsigned int last_line, boolean ignore, int argnum)
699 struct cstring *line;
700 unsigned int first_line; /* First available input line. */
701 unsigned int lines; /* Number of lines to output. */
704 first_line = get_first_line_in_buffer ();
706 if (first_line > last_line)
708 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
712 lines = last_line - first_line;
714 for (i = 0; i < lines; i++)
716 line = remove_line ();
719 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
723 save_line_to_file (line);
727 /* Output any lines left after all regexps have been processed. */
730 dump_rest_of_file (void)
732 struct cstring *line;
734 while ((line = remove_line ()) != NULL)
735 save_line_to_file (line);
738 /* Handle an attempt to read beyond EOF under the control of record P,
739 on iteration REPETITION if nonzero. */
742 handle_line_error (const struct control *p, int repetition)
744 char buf[LONGEST_HUMAN_READABLE + 1];
746 fprintf (stderr, _("%s: `%s': line number out of range"),
747 program_name, human_readable (p->lines_required, buf, 1, 1));
749 fprintf (stderr, _(" on repetition %d\n"), repetition);
751 fprintf (stderr, "\n");
756 /* Determine the line number that marks the end of this file,
757 then get those lines and save them to the output file.
758 P is the control record.
759 REPETITION is the repetition number. */
762 process_line_count (const struct control *p, int repetition)
764 unsigned int linenum;
765 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
766 struct cstring *line;
768 create_output_file ();
770 linenum = get_first_line_in_buffer ();
772 /* Initially, I wanted to assert linenum < last_line_to_save, but that
773 condition is false for the valid command: echo | csplit - 1 '{*}'.
774 So, relax it just a little. */
775 assert ((linenum == 1 && last_line_to_save == 1)
776 || linenum < last_line_to_save);
778 while (linenum++ < last_line_to_save)
780 line = remove_line ();
782 handle_line_error (p, repetition);
783 save_line_to_file (line);
786 close_output_file ();
788 /* Ensure that the line number specified is not 1 greater than
789 the number of lines in the file. */
790 if (no_more_lines ())
791 handle_line_error (p, repetition);
795 regexp_error (struct control *p, int repetition, boolean ignore)
797 fprintf (stderr, _("%s: `%s': match not found"),
798 program_name, global_argv[p->argnum]);
801 fprintf (stderr, _(" on repetition %d\n"), repetition);
803 fprintf (stderr, "\n");
807 dump_rest_of_file ();
808 close_output_file ();
813 /* Read the input until a line matches the regexp in P, outputting
814 it unless P->IGNORE is TRUE.
815 REPETITION is this repeat-count; 0 means the first time. */
818 process_regexp (struct control *p, int repetition)
820 struct cstring *line; /* From input file. */
821 unsigned int line_len; /* To make "$" in regexps work. */
822 unsigned int break_line; /* First line number of next file. */
823 boolean ignore = p->ignore; /* If TRUE, skip this section. */
827 create_output_file ();
829 /* If there is no offset for the regular expression, or
830 it is positive, then it is not necessary to buffer the lines. */
836 line = find_line (++current_line);
839 if (p->repeat_forever)
843 dump_rest_of_file ();
844 close_output_file ();
849 regexp_error (p, repetition, ignore);
851 line_len = line->len;
852 if (line->str[line_len - 1] == '\n')
854 ret = re_search (&p->re_compiled, line->str, line_len,
855 0, line_len, (struct re_registers *) 0);
858 error (0, 0, _("error in regular expression search"));
863 line = remove_line ();
865 save_line_to_file (line);
873 /* Buffer the lines. */
876 line = find_line (++current_line);
879 if (p->repeat_forever)
883 dump_rest_of_file ();
884 close_output_file ();
889 regexp_error (p, repetition, ignore);
891 line_len = line->len;
892 if (line->str[line_len - 1] == '\n')
894 ret = re_search (&p->re_compiled, line->str, line_len,
895 0, line_len, (struct re_registers *) 0);
898 error (0, 0, _("error in regular expression search"));
906 /* Account for any offset from this regexp. */
907 break_line = current_line + p->offset;
909 write_to_file (break_line, ignore, p->argnum);
912 close_output_file ();
915 current_line = break_line;
918 /* Split the input file according to the control records we have built. */
925 for (i = 0; i < control_used; i++)
927 if (controls[i].regexpr)
929 for (j = 0; (controls[i].repeat_forever
930 || j <= controls[i].repeat); j++)
931 process_regexp (&controls[i], j);
935 for (j = 0; (controls[i].repeat_forever
936 || j <= controls[i].repeat); j++)
937 process_line_count (&controls[i], j);
941 create_output_file ();
942 dump_rest_of_file ();
943 close_output_file ();
946 /* Return the name of output file number NUM. */
949 make_filename (unsigned int num)
951 strcpy (filename_space, prefix);
953 sprintf (filename_space+strlen(prefix), suffix, num);
955 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
956 return filename_space;
959 /* Create the next output file. */
962 create_output_file (void)
964 output_filename = make_filename (files_created);
965 output_stream = fopen (output_filename, "w");
966 if (output_stream == NULL)
968 error (0, errno, "%s", output_filename);
975 /* Delete all the files we have created. */
978 delete_all_files (void)
983 for (i = 0; i < files_created; i++)
985 name = make_filename (i);
987 error (0, errno, "%s", name);
991 /* Close the current output file and print the count
992 of characters in this file. */
995 close_output_file (void)
999 if (ferror (output_stream) || fclose (output_stream) == EOF)
1001 error (0, errno, _("write error for `%s'"), output_filename);
1002 output_stream = NULL;
1005 if (bytes_written == 0 && elide_empty_files)
1007 if (unlink (output_filename))
1008 error (0, errno, "%s", output_filename);
1013 /* FIXME: if we write to stdout here, we have to close stdout
1014 and check for errors. */
1015 if (!suppress_count)
1016 fprintf (stdout, "%d\n", bytes_written);
1018 output_stream = NULL;
1022 /* Save line LINE to the output file and
1023 increment the character count for the current file. */
1026 save_line_to_file (const struct cstring *line)
1028 fwrite (line->str, sizeof (char), line->len, output_stream);
1029 bytes_written += line->len;
1032 /* Return a new, initialized control record. */
1034 static struct control *
1035 new_control_record (void)
1037 static unsigned control_allocated = 0; /* Total space allocated. */
1040 if (control_allocated == 0)
1042 control_allocated = ALLOC_SIZE;
1043 controls = (struct control *)
1044 xmalloc (sizeof (struct control) * control_allocated);
1046 else if (control_used == control_allocated)
1048 control_allocated += ALLOC_SIZE;
1049 controls = (struct control *)
1050 xrealloc ((char *) controls,
1051 sizeof (struct control) * control_allocated);
1053 p = &controls[control_used++];
1056 p->repeat_forever = 0;
1057 p->lines_required = 0;
1062 /* Check if there is a numeric offset after a regular expression.
1063 STR is the entire command line argument.
1064 P is the control record for this regular expression.
1065 NUM is the numeric part of STR. */
1068 check_for_offset (struct control *p, const char *str, const char *num)
1072 if (*num != '-' && *num != '+')
1073 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1075 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1077 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1078 p->offset = (unsigned int) val;
1081 p->offset = -p->offset;
1084 /* Given that the first character of command line arg STR is '{',
1085 make sure that the rest of the string is a valid repeat count
1086 and store its value in P.
1087 ARGNUM is the ARGV index of STR. */
1090 parse_repeat_count (int argnum, struct control *p, char *str)
1095 end = str + strlen (str) - 1;
1097 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1100 if (str+1 == end-1 && *(str+1) == '*')
1101 p->repeat_forever = 1;
1104 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1106 error (EXIT_FAILURE, 0,
1107 _("%s}: integer required between `{' and `}'"),
1108 global_argv[argnum]);
1116 /* Extract the regular expression from STR and check for a numeric offset.
1117 STR should start with the regexp delimiter character.
1118 Return a new control record for the regular expression.
1119 ARGNUM is the ARGV index of STR.
1120 Unless IGNORE is TRUE, mark these lines for output. */
1122 static struct control *
1123 extract_regexp (int argnum, boolean ignore, char *str)
1125 int len; /* Number of chars in this regexp. */
1127 char *closing_delim;
1131 closing_delim = strrchr (str + 1, delim);
1132 if (closing_delim == NULL)
1133 error (EXIT_FAILURE, 0,
1134 _("%s: closing delimeter `%c' missing"), str, delim);
1136 len = closing_delim - str - 1;
1137 p = new_control_record ();
1141 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1142 strncpy (p->regexpr, str + 1, len);
1143 p->re_compiled.allocated = len * 2;
1144 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1145 p->re_compiled.fastmap = xmalloc (256);
1146 p->re_compiled.translate = 0;
1147 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1150 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1154 if (closing_delim[1])
1155 check_for_offset (p, str, closing_delim + 1);
1160 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1161 After each pattern, check if the next argument is a repeat count. */
1164 parse_patterns (int argc, int start, char **argv)
1166 int i; /* Index into ARGV. */
1167 struct control *p; /* New control record created. */
1169 static uintmax_t last_val = 0;
1171 for (i = start; i < argc; i++)
1173 if (*argv[i] == '/' || *argv[i] == '%')
1175 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1179 p = new_control_record ();
1182 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1183 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1185 error (EXIT_FAILURE, 0,
1186 _("%s: line number must be greater than zero"),
1190 char buf[LONGEST_HUMAN_READABLE + 1];
1191 error (EXIT_FAILURE, 0,
1192 _("line number `%s' is smaller than preceding line number, %s"),
1193 argv[i], human_readable (last_val, buf, 1, 1));
1196 if (val == last_val)
1198 _("warning: line number `%s' is the same as preceding line number"),
1203 p->lines_required = val;
1206 if (i + 1 < argc && *argv[i + 1] == '{')
1208 /* We have a repeat count. */
1210 parse_repeat_count (i, p, argv[i]);
1216 get_format_flags (char **format_ptr)
1220 for (; **format_ptr; (*format_ptr)++)
1222 switch (**format_ptr)
1233 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1244 get_format_width (char **format_ptr)
1250 start = *format_ptr;
1251 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1254 ch_save = **format_ptr;
1255 **format_ptr = '\0';
1256 /* In the case where no minimum field width is explicitly specified,
1257 allow for enough octal digits to represent the value of LONG_MAX. */
1258 count = ((*format_ptr == start)
1259 ? bytes_to_octal_digits[sizeof (long)]
1261 **format_ptr = ch_save;
1266 get_format_prec (char **format_ptr)
1273 if (**format_ptr != '.')
1277 if (**format_ptr == '-' || **format_ptr == '+')
1279 is_negative = (**format_ptr == '-');
1287 start = *format_ptr;
1288 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1291 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1294 start = *format_ptr;
1296 ch_save = **format_ptr;
1297 **format_ptr = '\0';
1298 count = (*format_ptr == start) ? 11 : atoi (start);
1299 **format_ptr = ch_save;
1305 get_format_conv_type (char **format_ptr)
1307 int ch = *((*format_ptr)++);
1320 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1325 error (EXIT_FAILURE, 0,
1326 _("invalid conversion specifier in suffix: %c"), ch);
1328 error (EXIT_FAILURE, 0,
1329 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1334 max_out (char *format)
1336 unsigned out_count = 0;
1337 unsigned percents = 0;
1348 out_count += get_format_flags (&format);
1350 int width = get_format_width (&format);
1351 int prec = get_format_prec (&format);
1353 out_count += MAX (width, prec);
1355 get_format_conv_type (&format);
1360 error (EXIT_FAILURE, 0,
1361 _("missing %% conversion specification in suffix"));
1362 else if (percents > 1)
1363 error (EXIT_FAILURE, 0,
1364 _("too many %% conversion specifications in suffix"));
1370 main (int argc, char **argv)
1375 struct sigaction oldact, newact;
1378 program_name = argv[0];
1379 setlocale (LC_ALL, "");
1380 bindtextdomain (PACKAGE, LOCALEDIR);
1381 textdomain (PACKAGE);
1383 atexit (close_stdout);
1388 suppress_count = FALSE;
1389 remove_files = TRUE;
1390 prefix = DEFAULT_PREFIX;
1392 /* Change the way xmalloc and xrealloc fail. */
1393 xalloc_fail_func = cleanup;
1396 newact.sa_handler = interrupt_handler;
1397 sigemptyset (&newact.sa_mask);
1398 newact.sa_flags = 0;
1400 sigaction (SIGHUP, NULL, &oldact);
1401 if (oldact.sa_handler != SIG_IGN)
1402 sigaction (SIGHUP, &newact, NULL);
1404 sigaction (SIGINT, NULL, &oldact);
1405 if (oldact.sa_handler != SIG_IGN)
1406 sigaction (SIGINT, &newact, NULL);
1408 sigaction (SIGQUIT, NULL, &oldact);
1409 if (oldact.sa_handler != SIG_IGN)
1410 sigaction (SIGQUIT, &newact, NULL);
1412 sigaction (SIGTERM, NULL, &oldact);
1413 if (oldact.sa_handler != SIG_IGN)
1414 sigaction (SIGTERM, &newact, NULL);
1416 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1417 signal (SIGHUP, interrupt_handler);
1418 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1419 signal (SIGINT, interrupt_handler);
1420 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1421 signal (SIGQUIT, interrupt_handler);
1422 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1423 signal (SIGTERM, interrupt_handler);
1426 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1441 remove_files = FALSE;
1445 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1447 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1453 suppress_count = TRUE;
1457 elide_empty_files = TRUE;
1460 case_GETOPT_HELP_CHAR;
1462 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1468 if (argc - optind < 2)
1470 error (0, 0, _("too few arguments"));
1475 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1477 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1479 set_input_file (argv[optind++]);
1481 parse_patterns (argc, optind, argv);
1485 if (close (input_desc) < 0)
1487 error (0, errno, _("read error"));
1491 exit (EXIT_SUCCESS);
1498 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1503 Usage: %s [OPTION]... FILE PATTERN...\n\
1507 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1508 and output byte counts of each piece to standard output.\n\
1510 Mandatory arguments to long options are mandatory for short options too.\n\
1511 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %d\n\
1512 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1513 -k, --keep-files do not remove output files on errors\n\
1516 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1517 -s, --quiet, --silent do not print counts of output file sizes\n\
1518 -z, --elide-empty-files remove empty output files\n\
1519 --help display this help and exit\n\
1520 --version output version information and exit\n\
1522 Read standard input if FILE is -. Each PATTERN may be:\n\
1526 INTEGER copy up to but not including specified line number\n\
1527 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1528 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1529 {INTEGER} repeat the previous pattern specified number of times\n\
1530 {*} repeat the previous pattern as many times as possible\n\
1532 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1534 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
1536 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);