1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
35 #include "safe-read.h"
38 /* The official name of this program (e.g., no `g' prefix). */
39 #define PROGRAM_NAME "csplit"
41 #define AUTHORS N_ ("Stuart Kemp and David MacKenzie")
48 /* Increment size of area for control records. */
51 /* The default prefix for output file names. */
52 #define DEFAULT_PREFIX "xx"
56 /* A compiled pattern arg. */
59 char *regexpr; /* Non-compiled regular expression. */
60 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
61 int offset; /* Offset from regexp to split at. */
62 uintmax_t lines_required; /* Number of lines required. */
63 uintmax_t repeat; /* Repeat count. */
64 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
65 int argnum; /* ARGV index. */
66 boolean ignore; /* If true, produce no output (for regexp). */
69 /* Initial size of data area in buffers. */
70 #define START_SIZE 8191
72 /* Increment size for data area. */
73 #define INCR_SIZE 2048
75 /* Number of lines kept in each node in line list. */
79 /* Some small values to test the algorithms. */
80 # define START_SIZE 200
85 /* A string with a length count. */
92 /* Pointers to the beginnings of lines in the buffer area.
93 These structures are linked together if needed. */
96 unsigned used; /* Number of offsets used in this struct. */
97 unsigned insert_index; /* Next offset to use when inserting line. */
98 unsigned retrieve_index; /* Next index to use when retrieving line. */
99 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
100 struct line *next; /* Next in linked list. */
103 /* The structure to hold the input lines.
104 Contains a pointer to the data area and a list containing
105 pointers to the individual lines. */
108 unsigned bytes_alloc; /* Size of the buffer area. */
109 unsigned bytes_used; /* Bytes used in the buffer area. */
110 unsigned start_line; /* First line number in this buffer. */
111 unsigned first_available; /* First line that can be retrieved. */
112 unsigned num_lines; /* Number of complete lines in this buffer. */
113 char *buffer; /* Data area. */
114 struct line *line_start; /* Head of list of pointers to lines. */
115 struct line *curr_line; /* The line start record currently in use. */
116 struct buffer_record *next;
119 static void close_output_file (void);
120 static void create_output_file (void);
121 static void delete_all_files (void);
122 static void save_line_to_file (const struct cstring *line);
123 void usage (int status);
125 /* The name this program was run with. */
128 /* Convert the number of 8-bit bytes of a binary representation to
129 the number of characters required to represent the same quantity
130 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
131 require a field width as wide as 11 characters. */
132 static const unsigned int bytes_to_octal_digits[] =
133 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
135 /* Input file descriptor. */
136 static int input_desc = 0;
138 /* List of available buffers. */
139 static struct buffer_record *free_list = NULL;
141 /* Start of buffer list. */
142 static struct buffer_record *head = NULL;
144 /* Partially read line. */
145 static char *hold_area = NULL;
147 /* Number of chars in `hold_area'. */
148 static unsigned hold_count = 0;
150 /* Number of the last line in the buffers. */
151 static unsigned last_line_number = 0;
153 /* Number of the line currently being examined. */
154 static unsigned current_line = 0;
156 /* If TRUE, we have read EOF. */
157 static boolean have_read_eof = FALSE;
159 /* Name of output files. */
160 static char *filename_space = NULL;
162 /* Prefix part of output file names. */
163 static char *prefix = NULL;
165 /* Suffix part of output file names. */
166 static char *suffix = NULL;
168 /* Number of digits to use in output file names. */
169 static int digits = 2;
171 /* Number of files created so far. */
172 static unsigned int files_created = 0;
174 /* Number of bytes written to current file. */
175 static unsigned int bytes_written;
177 /* Output file pointer. */
178 static FILE *output_stream = NULL;
180 /* Output file name. */
181 static char *output_filename = NULL;
183 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
184 static char **global_argv;
186 /* If TRUE, do not print the count of bytes in each output file. */
187 static boolean suppress_count;
189 /* If TRUE, remove output files on error. */
190 static boolean remove_files;
192 /* If TRUE, remove all output files which have a zero length. */
193 static boolean elide_empty_files;
195 /* The compiled pattern arguments, which determine how to split
197 static struct control *controls;
199 /* Number of elements in `controls'. */
200 static unsigned int control_used;
202 static struct option const longopts[] =
204 {"digits", required_argument, NULL, 'n'},
205 {"quiet", no_argument, NULL, 'q'},
206 {"silent", no_argument, NULL, 's'},
207 {"keep-files", no_argument, NULL, 'k'},
208 {"elide-empty-files", no_argument, NULL, 'z'},
209 {"prefix", required_argument, NULL, 'f'},
210 {"suffix-format", required_argument, NULL, 'b'},
211 {GETOPT_HELP_OPTION_DECL},
212 {GETOPT_VERSION_OPTION_DECL},
216 /* Optionally remove files created so far; then exit.
217 Called when an error detected. */
223 close_output_file ();
237 interrupt_handler (int sig)
240 struct sigaction sigact;
242 sigact.sa_handler = SIG_DFL;
243 sigemptyset (&sigact.sa_mask);
245 sigaction (sig, &sigact, NULL);
247 signal (sig, SIG_DFL);
253 /* Keep track of NUM chars of a partial line in buffer START.
254 These chars will be retrieved later when another large buffer is read.
255 It is not necessary to create a new buffer for these chars; instead,
256 we keep a pointer to the existing buffer. This buffer *is* on the
257 free list, and when the next buffer is obtained from this list
258 (even if it is this one), these chars will be placed at the
259 start of the new buffer. */
262 save_to_hold_area (char *start, unsigned int num)
268 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
269 Return the number of chars read. */
270 /* FIXME: MAX_N_BYTES should be of type size_t, but if you pull
271 that thread, you'll find there are many other `unsigned' types
272 in this file that should also be changed. */
275 read_input (char *dest, int max_n_bytes)
279 if (max_n_bytes == 0)
282 bytes_read = safe_read (input_desc, dest, max_n_bytes);
285 have_read_eof = TRUE;
287 if (bytes_read == SAFE_READ_ERROR)
289 error (0, errno, _("read error"));
296 /* Initialize existing line record P. */
299 clear_line_control (struct line *p)
303 p->retrieve_index = 0;
306 /* Initialize all line records in B. */
309 clear_all_line_control (struct buffer_record *b)
313 for (l = b->line_start; l; l = l->next)
314 clear_line_control (l);
317 /* Return a new, initialized line record. */
320 new_line_control (void)
324 p = xmalloc (sizeof (struct line));
327 clear_line_control (p);
332 /* Record LINE_START, which is the address of the start of a line
333 of length LINE_LEN in the large buffer, in the lines buffer of B. */
336 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
340 /* If there is no existing area to keep line info, get some. */
341 if (b->line_start == NULL)
342 b->line_start = b->curr_line = new_line_control ();
344 /* If existing area for lines is full, get more. */
345 if (b->curr_line->used == CTRL_SIZE)
347 b->curr_line->next = new_line_control ();
348 b->curr_line = b->curr_line->next;
353 /* Record the start of the line, and update counters. */
354 l->starts[l->insert_index].str = line_start;
355 l->starts[l->insert_index].len = line_len;
360 /* Scan the buffer in B for newline characters
361 and record the line start locations and lengths in B.
362 Return the number of lines found in this buffer.
364 There may be an incomplete line at the end of the buffer;
365 a pointer is kept to this area, which will be used when
366 the next buffer is filled. */
369 record_line_starts (struct buffer_record *b)
371 char *line_start; /* Start of current line. */
372 char *line_end; /* End of each line found. */
373 unsigned int bytes_left; /* Length of incomplete last line. */
374 unsigned int lines; /* Number of lines found. */
375 unsigned int line_length; /* Length of each line found. */
377 if (b->bytes_used == 0)
381 line_start = b->buffer;
382 bytes_left = b->bytes_used;
386 line_end = memchr (line_start, '\n', bytes_left);
387 if (line_end == NULL)
389 line_length = line_end - line_start + 1;
390 keep_new_line (b, line_start, line_length);
391 bytes_left -= line_length;
392 line_start = line_end + 1;
396 /* Check for an incomplete last line. */
401 keep_new_line (b, line_start, bytes_left);
405 save_to_hold_area (line_start, bytes_left);
408 b->num_lines = lines;
409 b->first_available = b->start_line = last_line_number + 1;
410 last_line_number += lines;
415 /* Return a new buffer with room to store SIZE bytes, plus
416 an extra byte for safety. */
418 static struct buffer_record *
419 create_new_buffer (unsigned int size)
421 struct buffer_record *new_buffer;
423 new_buffer = (struct buffer_record *)
424 xmalloc (sizeof (struct buffer_record));
426 new_buffer->buffer = xmalloc (size + 1);
428 new_buffer->bytes_alloc = size;
429 new_buffer->line_start = new_buffer->curr_line = NULL;
434 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
435 least that size is currently free, use it, otherwise create a new one. */
437 static struct buffer_record *
438 get_new_buffer (unsigned int min_size)
440 struct buffer_record *p, *q;
441 struct buffer_record *new_buffer; /* Buffer to return. */
442 unsigned int alloc_size; /* Actual size that will be requested. */
444 alloc_size = START_SIZE;
445 while (min_size > alloc_size)
446 alloc_size += INCR_SIZE;
448 if (free_list == NULL)
449 new_buffer = create_new_buffer (alloc_size);
452 /* Use first-fit to find a buffer. */
453 p = new_buffer = NULL;
458 if (q->bytes_alloc >= min_size)
471 new_buffer = (q ? q : create_new_buffer (alloc_size));
473 new_buffer->curr_line = new_buffer->line_start;
474 clear_all_line_control (new_buffer);
477 new_buffer->num_lines = 0;
478 new_buffer->bytes_used = 0;
479 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
480 new_buffer->next = NULL;
485 /* Add buffer BUF to the list of free buffers. */
488 free_buffer (struct buffer_record *buf)
490 buf->next = free_list;
494 /* Append buffer BUF to the linked list of buffers that contain
495 some data yet to be processed. */
498 save_buffer (struct buffer_record *buf)
500 struct buffer_record *p;
503 buf->curr_line = buf->line_start;
509 for (p = head; p->next; p = p->next)
515 /* Fill a buffer of input.
517 Set the initial size of the buffer to a default.
518 Fill the buffer (from the hold area and input stream)
519 and find the individual lines.
520 If no lines are found (the buffer is too small to hold the next line),
521 release the current buffer (whose contents would have been put in the
522 hold area) and repeat the process with another large buffer until at least
523 one entire line has been read.
525 Return TRUE if a new buffer was obtained, otherwise false
526 (in which case end-of-file must have been encountered). */
531 struct buffer_record *b;
532 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
533 unsigned int bytes_avail; /* Size of new buffer created. */
534 unsigned int lines_found; /* Number of lines in this new buffer. */
535 char *p; /* Place to load into buffer. */
540 /* We must make the buffer at least as large as the amount of data
541 in the partial line left over from the last call. */
542 if (bytes_wanted < hold_count)
543 bytes_wanted = hold_count;
547 b = get_new_buffer (bytes_wanted);
548 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
551 /* First check the `holding' area for a partial line. */
555 memcpy (p, hold_area, hold_count);
557 b->bytes_used += hold_count;
558 bytes_avail -= hold_count;
562 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
564 lines_found = record_line_starts (b);
565 bytes_wanted = b->bytes_alloc * 2;
569 while (!lines_found && !have_read_eof);
574 return lines_found != 0;
577 /* Return the line number of the first line that has not yet been retrieved. */
580 get_first_line_in_buffer (void)
582 if (head == NULL && !load_buffer ())
583 error (EXIT_FAILURE, errno, _("input disappeared"));
585 return head->first_available;
588 /* Return a pointer to the logical first line in the buffer and make the
589 next line the logical first line.
590 Return NULL if there is no more input. */
592 static struct cstring *
595 struct cstring *line; /* Return value. */
596 struct line *l; /* For convenience. */
598 if (head == NULL && !load_buffer ())
601 if (current_line < head->first_available)
602 current_line = head->first_available;
604 ++(head->first_available);
608 line = &l->starts[l->retrieve_index];
610 /* Advance index to next line. */
611 if (++l->retrieve_index == l->used)
613 /* Go on to the next line record. */
614 head->curr_line = l->next;
615 if (head->curr_line == NULL || head->curr_line->used == 0)
617 /* Go on to the next data block. */
618 struct buffer_record *b = head;
627 /* Search the buffers for line LINENUM, reading more input if necessary.
628 Return a pointer to the line, or NULL if it is not found in the file. */
630 static struct cstring *
631 find_line (unsigned int linenum)
633 struct buffer_record *b;
635 if (head == NULL && !load_buffer ())
638 if (linenum < head->start_line)
643 if (linenum < b->start_line + b->num_lines)
645 /* The line is in this buffer. */
647 unsigned int offset; /* How far into the buffer the line is. */
650 offset = linenum - b->start_line;
651 /* Find the control record. */
652 while (offset >= CTRL_SIZE)
657 return &l->starts[offset];
659 if (b->next == NULL && !load_buffer ())
661 b = b->next; /* Try the next data block. */
665 /* Return TRUE if at least one more line is available for input. */
670 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
673 /* Set the name of the input file to NAME and open it. */
676 set_input_file (const char *name)
678 if (STREQ (name, "-"))
682 input_desc = open (name, O_RDONLY);
684 error (EXIT_FAILURE, errno, "%s", name);
688 /* Write all lines from the beginning of the buffer up to, but
689 not including, line LAST_LINE, to the current output file.
690 If IGNORE is TRUE, do not output lines selected here.
691 ARGNUM is the index in ARGV of the current pattern. */
694 write_to_file (unsigned int last_line, boolean ignore, int argnum)
696 struct cstring *line;
697 unsigned int first_line; /* First available input line. */
698 unsigned int lines; /* Number of lines to output. */
701 first_line = get_first_line_in_buffer ();
703 if (first_line > last_line)
705 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
709 lines = last_line - first_line;
711 for (i = 0; i < lines; i++)
713 line = remove_line ();
716 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
720 save_line_to_file (line);
724 /* Output any lines left after all regexps have been processed. */
727 dump_rest_of_file (void)
729 struct cstring *line;
731 while ((line = remove_line ()) != NULL)
732 save_line_to_file (line);
735 /* Handle an attempt to read beyond EOF under the control of record P,
736 on iteration REPETITION if nonzero. */
739 handle_line_error (const struct control *p, int repetition)
741 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
743 fprintf (stderr, _("%s: `%s': line number out of range"),
744 program_name, umaxtostr (p->lines_required, buf));
746 fprintf (stderr, _(" on repetition %d\n"), repetition);
748 fprintf (stderr, "\n");
753 /* Determine the line number that marks the end of this file,
754 then get those lines and save them to the output file.
755 P is the control record.
756 REPETITION is the repetition number. */
759 process_line_count (const struct control *p, int repetition)
761 unsigned int linenum;
762 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
763 struct cstring *line;
765 create_output_file ();
767 linenum = get_first_line_in_buffer ();
769 while (linenum++ < last_line_to_save)
771 line = remove_line ();
773 handle_line_error (p, repetition);
774 save_line_to_file (line);
777 close_output_file ();
779 /* Ensure that the line number specified is not 1 greater than
780 the number of lines in the file. */
781 if (no_more_lines ())
782 handle_line_error (p, repetition);
786 regexp_error (struct control *p, int repetition, boolean ignore)
788 fprintf (stderr, _("%s: `%s': match not found"),
789 program_name, global_argv[p->argnum]);
792 fprintf (stderr, _(" on repetition %d\n"), repetition);
794 fprintf (stderr, "\n");
798 dump_rest_of_file ();
799 close_output_file ();
804 /* Read the input until a line matches the regexp in P, outputting
805 it unless P->IGNORE is TRUE.
806 REPETITION is this repeat-count; 0 means the first time. */
809 process_regexp (struct control *p, int repetition)
811 struct cstring *line; /* From input file. */
812 unsigned int line_len; /* To make "$" in regexps work. */
813 unsigned int break_line; /* First line number of next file. */
814 boolean ignore = p->ignore; /* If TRUE, skip this section. */
818 create_output_file ();
820 /* If there is no offset for the regular expression, or
821 it is positive, then it is not necessary to buffer the lines. */
827 line = find_line (++current_line);
830 if (p->repeat_forever)
834 dump_rest_of_file ();
835 close_output_file ();
840 regexp_error (p, repetition, ignore);
842 line_len = line->len;
843 if (line->str[line_len - 1] == '\n')
845 ret = re_search (&p->re_compiled, line->str, line_len,
846 0, line_len, (struct re_registers *) 0);
849 error (0, 0, _("error in regular expression search"));
854 line = remove_line ();
856 save_line_to_file (line);
864 /* Buffer the lines. */
867 line = find_line (++current_line);
870 if (p->repeat_forever)
874 dump_rest_of_file ();
875 close_output_file ();
880 regexp_error (p, repetition, ignore);
882 line_len = line->len;
883 if (line->str[line_len - 1] == '\n')
885 ret = re_search (&p->re_compiled, line->str, line_len,
886 0, line_len, (struct re_registers *) 0);
889 error (0, 0, _("error in regular expression search"));
897 /* Account for any offset from this regexp. */
898 break_line = current_line + p->offset;
900 write_to_file (break_line, ignore, p->argnum);
903 close_output_file ();
906 current_line = break_line;
909 /* Split the input file according to the control records we have built. */
916 for (i = 0; i < control_used; i++)
918 if (controls[i].regexpr)
920 for (j = 0; (controls[i].repeat_forever
921 || j <= controls[i].repeat); j++)
922 process_regexp (&controls[i], j);
926 for (j = 0; (controls[i].repeat_forever
927 || j <= controls[i].repeat); j++)
928 process_line_count (&controls[i], j);
932 create_output_file ();
933 dump_rest_of_file ();
934 close_output_file ();
937 /* Return the name of output file number NUM. */
940 make_filename (unsigned int num)
942 strcpy (filename_space, prefix);
944 sprintf (filename_space+strlen(prefix), suffix, num);
946 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
947 return filename_space;
950 /* Create the next output file. */
953 create_output_file (void)
955 output_filename = make_filename (files_created);
956 output_stream = fopen (output_filename, "w");
957 if (output_stream == NULL)
959 error (0, errno, "%s", output_filename);
966 /* Delete all the files we have created. */
969 delete_all_files (void)
974 for (i = 0; i < files_created; i++)
976 name = make_filename (i);
978 error (0, errno, "%s", name);
982 /* Close the current output file and print the count
983 of characters in this file. */
986 close_output_file (void)
990 if (ferror (output_stream) || fclose (output_stream) == EOF)
992 error (0, errno, _("write error for `%s'"), output_filename);
993 output_stream = NULL;
996 if (bytes_written == 0 && elide_empty_files)
998 if (unlink (output_filename))
999 error (0, errno, "%s", output_filename);
1004 /* FIXME: if we write to stdout here, we have to close stdout
1005 and check for errors. */
1006 if (!suppress_count)
1007 fprintf (stdout, "%d\n", bytes_written);
1009 output_stream = NULL;
1013 /* Save line LINE to the output file and
1014 increment the character count for the current file. */
1017 save_line_to_file (const struct cstring *line)
1019 fwrite (line->str, sizeof (char), line->len, output_stream);
1020 bytes_written += line->len;
1023 /* Return a new, initialized control record. */
1025 static struct control *
1026 new_control_record (void)
1028 static unsigned control_allocated = 0; /* Total space allocated. */
1031 if (control_allocated == 0)
1033 control_allocated = ALLOC_SIZE;
1034 controls = (struct control *)
1035 xmalloc (sizeof (struct control) * control_allocated);
1037 else if (control_used == control_allocated)
1039 control_allocated += ALLOC_SIZE;
1040 controls = (struct control *)
1042 sizeof (struct control) * control_allocated);
1044 p = &controls[control_used++];
1047 p->repeat_forever = 0;
1048 p->lines_required = 0;
1053 /* Check if there is a numeric offset after a regular expression.
1054 STR is the entire command line argument.
1055 P is the control record for this regular expression.
1056 NUM is the numeric part of STR. */
1059 check_for_offset (struct control *p, const char *str, const char *num)
1063 if (*num != '-' && *num != '+')
1064 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1066 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1068 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1069 p->offset = (unsigned int) val;
1072 p->offset = -p->offset;
1075 /* Given that the first character of command line arg STR is '{',
1076 make sure that the rest of the string is a valid repeat count
1077 and store its value in P.
1078 ARGNUM is the ARGV index of STR. */
1081 parse_repeat_count (int argnum, struct control *p, char *str)
1086 end = str + strlen (str) - 1;
1088 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1091 if (str+1 == end-1 && *(str+1) == '*')
1092 p->repeat_forever = 1;
1095 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1097 error (EXIT_FAILURE, 0,
1098 _("%s}: integer required between `{' and `}'"),
1099 global_argv[argnum]);
1107 /* Extract the regular expression from STR and check for a numeric offset.
1108 STR should start with the regexp delimiter character.
1109 Return a new control record for the regular expression.
1110 ARGNUM is the ARGV index of STR.
1111 Unless IGNORE is TRUE, mark these lines for output. */
1113 static struct control *
1114 extract_regexp (int argnum, boolean ignore, char *str)
1116 int len; /* Number of chars in this regexp. */
1118 char *closing_delim;
1122 closing_delim = strrchr (str + 1, delim);
1123 if (closing_delim == NULL)
1124 error (EXIT_FAILURE, 0,
1125 _("%s: closing delimeter `%c' missing"), str, delim);
1127 len = closing_delim - str - 1;
1128 p = new_control_record ();
1132 p->regexpr = xmalloc ((unsigned) (len + 1));
1133 strncpy (p->regexpr, str + 1, len);
1134 p->re_compiled.allocated = len * 2;
1135 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1136 p->re_compiled.fastmap = xmalloc (256);
1137 p->re_compiled.translate = 0;
1138 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1141 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1145 if (closing_delim[1])
1146 check_for_offset (p, str, closing_delim + 1);
1151 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1152 After each pattern, check if the next argument is a repeat count. */
1155 parse_patterns (int argc, int start, char **argv)
1157 int i; /* Index into ARGV. */
1158 struct control *p; /* New control record created. */
1160 static uintmax_t last_val = 0;
1162 for (i = start; i < argc; i++)
1164 if (*argv[i] == '/' || *argv[i] == '%')
1166 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1170 p = new_control_record ();
1173 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1174 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1176 error (EXIT_FAILURE, 0,
1177 _("%s: line number must be greater than zero"),
1181 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1182 error (EXIT_FAILURE, 0,
1183 _("line number `%s' is smaller than preceding line number, %s"),
1184 argv[i], umaxtostr (last_val, buf));
1187 if (val == last_val)
1189 _("warning: line number `%s' is the same as preceding line number"),
1194 p->lines_required = val;
1197 if (i + 1 < argc && *argv[i + 1] == '{')
1199 /* We have a repeat count. */
1201 parse_repeat_count (i, p, argv[i]);
1207 get_format_flags (char **format_ptr)
1211 for (; **format_ptr; (*format_ptr)++)
1213 switch (**format_ptr)
1224 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1235 get_format_width (char **format_ptr)
1241 start = *format_ptr;
1242 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1245 ch_save = **format_ptr;
1246 **format_ptr = '\0';
1247 /* In the case where no minimum field width is explicitly specified,
1248 allow for enough octal digits to represent the value of LONG_MAX. */
1249 count = ((*format_ptr == start)
1250 ? bytes_to_octal_digits[sizeof (long)]
1251 /* FIXME: don't use atoi, it may silently overflow.
1252 Besides, we know the result is non-negative, so shouldn't
1254 : (unsigned) atoi (start));
1255 **format_ptr = ch_save;
1260 get_format_prec (char **format_ptr)
1267 if (**format_ptr != '.')
1271 if (**format_ptr == '-' || **format_ptr == '+')
1273 is_negative = (**format_ptr == '-');
1281 start = *format_ptr;
1282 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1285 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1288 start = *format_ptr;
1290 ch_save = **format_ptr;
1291 **format_ptr = '\0';
1292 count = (*format_ptr == start) ? 11 : atoi (start);
1293 **format_ptr = ch_save;
1299 get_format_conv_type (char **format_ptr)
1301 int ch = *((*format_ptr)++);
1314 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1319 error (EXIT_FAILURE, 0,
1320 _("invalid conversion specifier in suffix: %c"), ch);
1322 error (EXIT_FAILURE, 0,
1323 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1328 max_out (char *format)
1330 unsigned out_count = 0;
1331 unsigned percents = 0;
1342 out_count += get_format_flags (&format);
1344 int width = get_format_width (&format);
1345 int prec = get_format_prec (&format);
1347 out_count += MAX (width, prec);
1349 get_format_conv_type (&format);
1354 error (EXIT_FAILURE, 0,
1355 _("missing %% conversion specification in suffix"));
1356 else if (percents > 1)
1357 error (EXIT_FAILURE, 0,
1358 _("too many %% conversion specifications in suffix"));
1364 main (int argc, char **argv)
1369 struct sigaction oldact, newact;
1372 initialize_main (&argc, &argv);
1373 program_name = argv[0];
1374 setlocale (LC_ALL, "");
1375 bindtextdomain (PACKAGE, LOCALEDIR);
1376 textdomain (PACKAGE);
1378 atexit (close_stdout);
1383 suppress_count = FALSE;
1384 remove_files = TRUE;
1385 prefix = DEFAULT_PREFIX;
1387 /* Change the way xmalloc and xrealloc fail. */
1388 xalloc_fail_func = cleanup;
1391 newact.sa_handler = interrupt_handler;
1392 sigemptyset (&newact.sa_mask);
1393 newact.sa_flags = 0;
1395 sigaction (SIGHUP, NULL, &oldact);
1396 if (oldact.sa_handler != SIG_IGN)
1397 sigaction (SIGHUP, &newact, NULL);
1399 sigaction (SIGINT, NULL, &oldact);
1400 if (oldact.sa_handler != SIG_IGN)
1401 sigaction (SIGINT, &newact, NULL);
1403 sigaction (SIGQUIT, NULL, &oldact);
1404 if (oldact.sa_handler != SIG_IGN)
1405 sigaction (SIGQUIT, &newact, NULL);
1407 sigaction (SIGTERM, NULL, &oldact);
1408 if (oldact.sa_handler != SIG_IGN)
1409 sigaction (SIGTERM, &newact, NULL);
1411 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1412 signal (SIGHUP, interrupt_handler);
1413 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1414 signal (SIGINT, interrupt_handler);
1415 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1416 signal (SIGQUIT, interrupt_handler);
1417 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1418 signal (SIGTERM, interrupt_handler);
1421 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1436 remove_files = FALSE;
1440 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1442 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1448 suppress_count = TRUE;
1452 elide_empty_files = TRUE;
1455 case_GETOPT_HELP_CHAR;
1457 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1460 usage (EXIT_FAILURE);
1463 if (argc - optind < 2)
1465 error (0, 0, _("too few arguments"));
1466 usage (EXIT_FAILURE);
1470 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1472 filename_space = xmalloc (strlen (prefix) + digits + 2);
1474 set_input_file (argv[optind++]);
1476 parse_patterns (argc, optind, argv);
1480 if (close (input_desc) < 0)
1482 error (0, errno, _("read error"));
1486 exit (EXIT_SUCCESS);
1493 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1498 Usage: %s [OPTION]... FILE PATTERN...\n\
1502 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1503 and output byte counts of each piece to standard output.\n\
1507 Mandatory arguments to long options are mandatory for short options too.\n\
1510 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %d\n\
1511 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1512 -k, --keep-files do not remove output files on errors\n\
1515 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1516 -s, --quiet, --silent do not print counts of output file sizes\n\
1517 -z, --elide-empty-files remove empty output files\n\
1519 fputs (HELP_OPTION_DESCRIPTION, stdout);
1520 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1523 Read standard input if FILE is -. Each PATTERN may be:\n\
1527 INTEGER copy up to but not including specified line number\n\
1528 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1529 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1530 {INTEGER} repeat the previous pattern specified number of times\n\
1531 {*} repeat the previous pattern as many times as possible\n\
1533 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1535 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1537 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);