1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
35 #include "safe-read.h"
38 /* The official name of this program (e.g., no `g' prefix). */
39 #define PROGRAM_NAME "csplit"
41 #define AUTHORS N_ ("Stuart Kemp and David MacKenzie")
48 /* Increment size of area for control records. */
51 /* The default prefix for output file names. */
52 #define DEFAULT_PREFIX "xx"
56 /* A compiled pattern arg. */
59 char *regexpr; /* Non-compiled regular expression. */
60 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
61 int offset; /* Offset from regexp to split at. */
62 uintmax_t lines_required; /* Number of lines required. */
63 uintmax_t repeat; /* Repeat count. */
64 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
65 int argnum; /* ARGV index. */
66 boolean ignore; /* If true, produce no output (for regexp). */
69 /* Initial size of data area in buffers. */
70 #define START_SIZE 8191
72 /* Increment size for data area. */
73 #define INCR_SIZE 2048
75 /* Number of lines kept in each node in line list. */
79 /* Some small values to test the algorithms. */
80 # define START_SIZE 200
85 /* A string with a length count. */
92 /* Pointers to the beginnings of lines in the buffer area.
93 These structures are linked together if needed. */
96 unsigned used; /* Number of offsets used in this struct. */
97 unsigned insert_index; /* Next offset to use when inserting line. */
98 unsigned retrieve_index; /* Next index to use when retrieving line. */
99 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
100 struct line *next; /* Next in linked list. */
103 /* The structure to hold the input lines.
104 Contains a pointer to the data area and a list containing
105 pointers to the individual lines. */
108 unsigned bytes_alloc; /* Size of the buffer area. */
109 unsigned bytes_used; /* Bytes used in the buffer area. */
110 unsigned start_line; /* First line number in this buffer. */
111 unsigned first_available; /* First line that can be retrieved. */
112 unsigned num_lines; /* Number of complete lines in this buffer. */
113 char *buffer; /* Data area. */
114 struct line *line_start; /* Head of list of pointers to lines. */
115 struct line *curr_line; /* The line start record currently in use. */
116 struct buffer_record *next;
119 static void close_output_file PARAMS ((void));
120 static void create_output_file PARAMS ((void));
121 static void delete_all_files PARAMS ((void));
122 static void save_line_to_file PARAMS ((const struct cstring *line));
123 void usage PARAMS ((int status));
125 /* The name this program was run with. */
128 /* Convert the number of 8-bit bytes of a binary representation to
129 the number of characters required to represent the same quantity
130 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
131 require a field width as wide as 11 characters. */
132 static const unsigned int bytes_to_octal_digits[] =
133 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
135 /* Input file descriptor. */
136 static int input_desc = 0;
138 /* List of available buffers. */
139 static struct buffer_record *free_list = NULL;
141 /* Start of buffer list. */
142 static struct buffer_record *head = NULL;
144 /* Partially read line. */
145 static char *hold_area = NULL;
147 /* Number of chars in `hold_area'. */
148 static unsigned hold_count = 0;
150 /* Number of the last line in the buffers. */
151 static unsigned last_line_number = 0;
153 /* Number of the line currently being examined. */
154 static unsigned current_line = 0;
156 /* If TRUE, we have read EOF. */
157 static boolean have_read_eof = FALSE;
159 /* Name of output files. */
160 static char *filename_space = NULL;
162 /* Prefix part of output file names. */
163 static char *prefix = NULL;
165 /* Suffix part of output file names. */
166 static char *suffix = NULL;
168 /* Number of digits to use in output file names. */
169 static int digits = 2;
171 /* Number of files created so far. */
172 static unsigned int files_created = 0;
174 /* Number of bytes written to current file. */
175 static unsigned int bytes_written;
177 /* Output file pointer. */
178 static FILE *output_stream = NULL;
180 /* Output file name. */
181 static char *output_filename = NULL;
183 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
184 static char **global_argv;
186 /* If TRUE, do not print the count of bytes in each output file. */
187 static boolean suppress_count;
189 /* If TRUE, remove output files on error. */
190 static boolean remove_files;
192 /* If TRUE, remove all output files which have a zero length. */
193 static boolean elide_empty_files;
195 /* The compiled pattern arguments, which determine how to split
197 static struct control *controls;
199 /* Number of elements in `controls'. */
200 static unsigned int control_used;
202 static struct option const longopts[] =
204 {"digits", required_argument, NULL, 'n'},
205 {"quiet", no_argument, NULL, 'q'},
206 {"silent", no_argument, NULL, 's'},
207 {"keep-files", no_argument, NULL, 'k'},
208 {"elide-empty-files", no_argument, NULL, 'z'},
209 {"prefix", required_argument, NULL, 'f'},
210 {"suffix-format", required_argument, NULL, 'b'},
211 {GETOPT_HELP_OPTION_DECL},
212 {GETOPT_VERSION_OPTION_DECL},
216 /* Optionally remove files created so far; then exit.
217 Called when an error detected. */
223 close_output_file ();
237 interrupt_handler (int sig)
240 struct sigaction sigact;
242 sigact.sa_handler = SIG_DFL;
243 sigemptyset (&sigact.sa_mask);
245 sigaction (sig, &sigact, NULL);
247 signal (sig, SIG_DFL);
250 kill (getpid (), sig);
253 /* Keep track of NUM chars of a partial line in buffer START.
254 These chars will be retrieved later when another large buffer is read.
255 It is not necessary to create a new buffer for these chars; instead,
256 we keep a pointer to the existing buffer. This buffer *is* on the
257 free list, and when the next buffer is obtained from this list
258 (even if it is this one), these chars will be placed at the
259 start of the new buffer. */
262 save_to_hold_area (char *start, unsigned int num)
268 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
269 Return the number of chars read. */
272 read_input (char *dest, unsigned int max_n_bytes)
276 if (max_n_bytes == 0)
279 bytes_read = safe_read (input_desc, dest, max_n_bytes);
282 have_read_eof = TRUE;
286 error (0, errno, _("read error"));
293 /* Initialize existing line record P. */
296 clear_line_control (struct line *p)
300 p->retrieve_index = 0;
303 /* Initialize all line records in B. */
306 clear_all_line_control (struct buffer_record *b)
310 for (l = b->line_start; l; l = l->next)
311 clear_line_control (l);
314 /* Return a new, initialized line record. */
317 new_line_control (void)
321 p = (struct line *) xmalloc (sizeof (struct line));
324 clear_line_control (p);
329 /* Record LINE_START, which is the address of the start of a line
330 of length LINE_LEN in the large buffer, in the lines buffer of B. */
333 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
337 /* If there is no existing area to keep line info, get some. */
338 if (b->line_start == NULL)
339 b->line_start = b->curr_line = new_line_control ();
341 /* If existing area for lines is full, get more. */
342 if (b->curr_line->used == CTRL_SIZE)
344 b->curr_line->next = new_line_control ();
345 b->curr_line = b->curr_line->next;
350 /* Record the start of the line, and update counters. */
351 l->starts[l->insert_index].str = line_start;
352 l->starts[l->insert_index].len = line_len;
357 /* Scan the buffer in B for newline characters
358 and record the line start locations and lengths in B.
359 Return the number of lines found in this buffer.
361 There may be an incomplete line at the end of the buffer;
362 a pointer is kept to this area, which will be used when
363 the next buffer is filled. */
366 record_line_starts (struct buffer_record *b)
368 char *line_start; /* Start of current line. */
369 char *line_end; /* End of each line found. */
370 unsigned int bytes_left; /* Length of incomplete last line. */
371 unsigned int lines; /* Number of lines found. */
372 unsigned int line_length; /* Length of each line found. */
374 if (b->bytes_used == 0)
378 line_start = b->buffer;
379 bytes_left = b->bytes_used;
383 line_end = memchr (line_start, '\n', bytes_left);
384 if (line_end == NULL)
386 line_length = line_end - line_start + 1;
387 keep_new_line (b, line_start, line_length);
388 bytes_left -= line_length;
389 line_start = line_end + 1;
393 /* Check for an incomplete last line. */
398 keep_new_line (b, line_start, bytes_left);
402 save_to_hold_area (line_start, bytes_left);
405 b->num_lines = lines;
406 b->first_available = b->start_line = last_line_number + 1;
407 last_line_number += lines;
412 /* Return a new buffer with room to store SIZE bytes, plus
413 an extra byte for safety. */
415 static struct buffer_record *
416 create_new_buffer (unsigned int size)
418 struct buffer_record *new_buffer;
420 new_buffer = (struct buffer_record *)
421 xmalloc (sizeof (struct buffer_record));
423 new_buffer->buffer = (char *) xmalloc (size + 1);
425 new_buffer->bytes_alloc = size;
426 new_buffer->line_start = new_buffer->curr_line = NULL;
431 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
432 least that size is currently free, use it, otherwise create a new one. */
434 static struct buffer_record *
435 get_new_buffer (unsigned int min_size)
437 struct buffer_record *p, *q;
438 struct buffer_record *new_buffer; /* Buffer to return. */
439 unsigned int alloc_size; /* Actual size that will be requested. */
441 alloc_size = START_SIZE;
442 while (min_size > alloc_size)
443 alloc_size += INCR_SIZE;
445 if (free_list == NULL)
446 new_buffer = create_new_buffer (alloc_size);
449 /* Use first-fit to find a buffer. */
450 p = new_buffer = NULL;
455 if (q->bytes_alloc >= min_size)
468 new_buffer = (q ? q : create_new_buffer (alloc_size));
470 new_buffer->curr_line = new_buffer->line_start;
471 clear_all_line_control (new_buffer);
474 new_buffer->num_lines = 0;
475 new_buffer->bytes_used = 0;
476 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
477 new_buffer->next = NULL;
482 /* Add buffer BUF to the list of free buffers. */
485 free_buffer (struct buffer_record *buf)
487 buf->next = free_list;
491 /* Append buffer BUF to the linked list of buffers that contain
492 some data yet to be processed. */
495 save_buffer (struct buffer_record *buf)
497 struct buffer_record *p;
500 buf->curr_line = buf->line_start;
506 for (p = head; p->next; p = p->next)
512 /* Fill a buffer of input.
514 Set the initial size of the buffer to a default.
515 Fill the buffer (from the hold area and input stream)
516 and find the individual lines.
517 If no lines are found (the buffer is too small to hold the next line),
518 release the current buffer (whose contents would have been put in the
519 hold area) and repeat the process with another large buffer until at least
520 one entire line has been read.
522 Return TRUE if a new buffer was obtained, otherwise false
523 (in which case end-of-file must have been encountered). */
528 struct buffer_record *b;
529 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
530 unsigned int bytes_avail; /* Size of new buffer created. */
531 unsigned int lines_found; /* Number of lines in this new buffer. */
532 char *p; /* Place to load into buffer. */
537 /* We must make the buffer at least as large as the amount of data
538 in the partial line left over from the last call. */
539 if (bytes_wanted < hold_count)
540 bytes_wanted = hold_count;
544 b = get_new_buffer (bytes_wanted);
545 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
548 /* First check the `holding' area for a partial line. */
552 memcpy (p, hold_area, hold_count);
554 b->bytes_used += hold_count;
555 bytes_avail -= hold_count;
559 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
561 lines_found = record_line_starts (b);
562 bytes_wanted = b->bytes_alloc * 2;
566 while (!lines_found && !have_read_eof);
571 return lines_found != 0;
574 /* Return the line number of the first line that has not yet been retrieved. */
577 get_first_line_in_buffer (void)
579 if (head == NULL && !load_buffer ())
580 error (EXIT_FAILURE, errno, _("input disappeared"));
582 return head->first_available;
585 /* Return a pointer to the logical first line in the buffer and make the
586 next line the logical first line.
587 Return NULL if there is no more input. */
589 static struct cstring *
592 struct cstring *line; /* Return value. */
593 struct line *l; /* For convenience. */
595 if (head == NULL && !load_buffer ())
598 if (current_line < head->first_available)
599 current_line = head->first_available;
601 ++(head->first_available);
605 line = &l->starts[l->retrieve_index];
607 /* Advance index to next line. */
608 if (++l->retrieve_index == l->used)
610 /* Go on to the next line record. */
611 head->curr_line = l->next;
612 if (head->curr_line == NULL || head->curr_line->used == 0)
614 /* Go on to the next data block. */
615 struct buffer_record *b = head;
624 /* Search the buffers for line LINENUM, reading more input if necessary.
625 Return a pointer to the line, or NULL if it is not found in the file. */
627 static struct cstring *
628 find_line (unsigned int linenum)
630 struct buffer_record *b;
632 if (head == NULL && !load_buffer ())
635 if (linenum < head->start_line)
640 if (linenum < b->start_line + b->num_lines)
642 /* The line is in this buffer. */
644 unsigned int offset; /* How far into the buffer the line is. */
647 offset = linenum - b->start_line;
648 /* Find the control record. */
649 while (offset >= CTRL_SIZE)
654 return &l->starts[offset];
656 if (b->next == NULL && !load_buffer ())
658 b = b->next; /* Try the next data block. */
662 /* Return TRUE if at least one more line is available for input. */
667 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
670 /* Set the name of the input file to NAME and open it. */
673 set_input_file (const char *name)
675 if (STREQ (name, "-"))
679 input_desc = open (name, O_RDONLY);
681 error (EXIT_FAILURE, errno, "%s", name);
685 /* Write all lines from the beginning of the buffer up to, but
686 not including, line LAST_LINE, to the current output file.
687 If IGNORE is TRUE, do not output lines selected here.
688 ARGNUM is the index in ARGV of the current pattern. */
691 write_to_file (unsigned int last_line, boolean ignore, int argnum)
693 struct cstring *line;
694 unsigned int first_line; /* First available input line. */
695 unsigned int lines; /* Number of lines to output. */
698 first_line = get_first_line_in_buffer ();
700 if (first_line > last_line)
702 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
706 lines = last_line - first_line;
708 for (i = 0; i < lines; i++)
710 line = remove_line ();
713 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
717 save_line_to_file (line);
721 /* Output any lines left after all regexps have been processed. */
724 dump_rest_of_file (void)
726 struct cstring *line;
728 while ((line = remove_line ()) != NULL)
729 save_line_to_file (line);
732 /* Handle an attempt to read beyond EOF under the control of record P,
733 on iteration REPETITION if nonzero. */
736 handle_line_error (const struct control *p, int repetition)
738 char buf[LONGEST_HUMAN_READABLE + 1];
740 fprintf (stderr, _("%s: `%s': line number out of range"),
741 program_name, human_readable (p->lines_required, buf, 1, 1));
743 fprintf (stderr, _(" on repetition %d\n"), repetition);
745 fprintf (stderr, "\n");
750 /* Determine the line number that marks the end of this file,
751 then get those lines and save them to the output file.
752 P is the control record.
753 REPETITION is the repetition number. */
756 process_line_count (const struct control *p, int repetition)
758 unsigned int linenum;
759 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
760 struct cstring *line;
762 create_output_file ();
764 linenum = get_first_line_in_buffer ();
766 while (linenum++ < last_line_to_save)
768 line = remove_line ();
770 handle_line_error (p, repetition);
771 save_line_to_file (line);
774 close_output_file ();
776 /* Ensure that the line number specified is not 1 greater than
777 the number of lines in the file. */
778 if (no_more_lines ())
779 handle_line_error (p, repetition);
783 regexp_error (struct control *p, int repetition, boolean ignore)
785 fprintf (stderr, _("%s: `%s': match not found"),
786 program_name, global_argv[p->argnum]);
789 fprintf (stderr, _(" on repetition %d\n"), repetition);
791 fprintf (stderr, "\n");
795 dump_rest_of_file ();
796 close_output_file ();
801 /* Read the input until a line matches the regexp in P, outputting
802 it unless P->IGNORE is TRUE.
803 REPETITION is this repeat-count; 0 means the first time. */
806 process_regexp (struct control *p, int repetition)
808 struct cstring *line; /* From input file. */
809 unsigned int line_len; /* To make "$" in regexps work. */
810 unsigned int break_line; /* First line number of next file. */
811 boolean ignore = p->ignore; /* If TRUE, skip this section. */
815 create_output_file ();
817 /* If there is no offset for the regular expression, or
818 it is positive, then it is not necessary to buffer the lines. */
824 line = find_line (++current_line);
827 if (p->repeat_forever)
831 dump_rest_of_file ();
832 close_output_file ();
837 regexp_error (p, repetition, ignore);
839 line_len = line->len;
840 if (line->str[line_len - 1] == '\n')
842 ret = re_search (&p->re_compiled, line->str, line_len,
843 0, line_len, (struct re_registers *) 0);
846 error (0, 0, _("error in regular expression search"));
851 line = remove_line ();
853 save_line_to_file (line);
861 /* Buffer the lines. */
864 line = find_line (++current_line);
867 if (p->repeat_forever)
871 dump_rest_of_file ();
872 close_output_file ();
877 regexp_error (p, repetition, ignore);
879 line_len = line->len;
880 if (line->str[line_len - 1] == '\n')
882 ret = re_search (&p->re_compiled, line->str, line_len,
883 0, line_len, (struct re_registers *) 0);
886 error (0, 0, _("error in regular expression search"));
894 /* Account for any offset from this regexp. */
895 break_line = current_line + p->offset;
897 write_to_file (break_line, ignore, p->argnum);
900 close_output_file ();
903 current_line = break_line;
906 /* Split the input file according to the control records we have built. */
913 for (i = 0; i < control_used; i++)
915 if (controls[i].regexpr)
917 for (j = 0; (controls[i].repeat_forever
918 || j <= controls[i].repeat); j++)
919 process_regexp (&controls[i], j);
923 for (j = 0; (controls[i].repeat_forever
924 || j <= controls[i].repeat); j++)
925 process_line_count (&controls[i], j);
929 create_output_file ();
930 dump_rest_of_file ();
931 close_output_file ();
934 /* Return the name of output file number NUM. */
937 make_filename (unsigned int num)
939 strcpy (filename_space, prefix);
941 sprintf (filename_space+strlen(prefix), suffix, num);
943 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
944 return filename_space;
947 /* Create the next output file. */
950 create_output_file (void)
952 output_filename = make_filename (files_created);
953 output_stream = fopen (output_filename, "w");
954 if (output_stream == NULL)
956 error (0, errno, "%s", output_filename);
963 /* Delete all the files we have created. */
966 delete_all_files (void)
971 for (i = 0; i < files_created; i++)
973 name = make_filename (i);
975 error (0, errno, "%s", name);
979 /* Close the current output file and print the count
980 of characters in this file. */
983 close_output_file (void)
987 if (ferror (output_stream) || fclose (output_stream) == EOF)
989 error (0, errno, _("write error for `%s'"), output_filename);
990 output_stream = NULL;
993 if (bytes_written == 0 && elide_empty_files)
995 if (unlink (output_filename))
996 error (0, errno, "%s", output_filename);
1001 /* FIXME: if we write to stdout here, we have to close stdout
1002 and check for errors. */
1003 if (!suppress_count)
1004 fprintf (stdout, "%d\n", bytes_written);
1006 output_stream = NULL;
1010 /* Save line LINE to the output file and
1011 increment the character count for the current file. */
1014 save_line_to_file (const struct cstring *line)
1016 fwrite (line->str, sizeof (char), line->len, output_stream);
1017 bytes_written += line->len;
1020 /* Return a new, initialized control record. */
1022 static struct control *
1023 new_control_record (void)
1025 static unsigned control_allocated = 0; /* Total space allocated. */
1028 if (control_allocated == 0)
1030 control_allocated = ALLOC_SIZE;
1031 controls = (struct control *)
1032 xmalloc (sizeof (struct control) * control_allocated);
1034 else if (control_used == control_allocated)
1036 control_allocated += ALLOC_SIZE;
1037 controls = (struct control *)
1038 xrealloc ((char *) controls,
1039 sizeof (struct control) * control_allocated);
1041 p = &controls[control_used++];
1044 p->repeat_forever = 0;
1045 p->lines_required = 0;
1050 /* Check if there is a numeric offset after a regular expression.
1051 STR is the entire command line argument.
1052 P is the control record for this regular expression.
1053 NUM is the numeric part of STR. */
1056 check_for_offset (struct control *p, const char *str, const char *num)
1060 if (*num != '-' && *num != '+')
1061 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1063 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1065 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1066 p->offset = (unsigned int) val;
1069 p->offset = -p->offset;
1072 /* Given that the first character of command line arg STR is '{',
1073 make sure that the rest of the string is a valid repeat count
1074 and store its value in P.
1075 ARGNUM is the ARGV index of STR. */
1078 parse_repeat_count (int argnum, struct control *p, char *str)
1083 end = str + strlen (str) - 1;
1085 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1088 if (str+1 == end-1 && *(str+1) == '*')
1089 p->repeat_forever = 1;
1092 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1094 error (EXIT_FAILURE, 0,
1095 _("%s}: integer required between `{' and `}'"),
1096 global_argv[argnum]);
1104 /* Extract the regular expression from STR and check for a numeric offset.
1105 STR should start with the regexp delimiter character.
1106 Return a new control record for the regular expression.
1107 ARGNUM is the ARGV index of STR.
1108 Unless IGNORE is TRUE, mark these lines for output. */
1110 static struct control *
1111 extract_regexp (int argnum, boolean ignore, char *str)
1113 int len; /* Number of chars in this regexp. */
1115 char *closing_delim;
1119 closing_delim = strrchr (str + 1, delim);
1120 if (closing_delim == NULL)
1121 error (EXIT_FAILURE, 0,
1122 _("%s: closing delimeter `%c' missing"), str, delim);
1124 len = closing_delim - str - 1;
1125 p = new_control_record ();
1129 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1130 strncpy (p->regexpr, str + 1, len);
1131 p->re_compiled.allocated = len * 2;
1132 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1133 p->re_compiled.fastmap = xmalloc (256);
1134 p->re_compiled.translate = 0;
1135 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1138 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1142 if (closing_delim[1])
1143 check_for_offset (p, str, closing_delim + 1);
1148 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1149 After each pattern, check if the next argument is a repeat count. */
1152 parse_patterns (int argc, int start, char **argv)
1154 int i; /* Index into ARGV. */
1155 struct control *p; /* New control record created. */
1157 static uintmax_t last_val = 0;
1159 for (i = start; i < argc; i++)
1161 if (*argv[i] == '/' || *argv[i] == '%')
1163 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1167 p = new_control_record ();
1170 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1171 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1173 error (EXIT_FAILURE, 0,
1174 _("%s: line number must be greater than zero"),
1178 char buf[LONGEST_HUMAN_READABLE + 1];
1179 error (EXIT_FAILURE, 0,
1180 _("line number `%s' is smaller than preceding line number, %s"),
1181 argv[i], human_readable (last_val, buf, 1, 1));
1184 if (val == last_val)
1186 _("warning: line number `%s' is the same as preceding line number"),
1191 p->lines_required = val;
1194 if (i + 1 < argc && *argv[i + 1] == '{')
1196 /* We have a repeat count. */
1198 parse_repeat_count (i, p, argv[i]);
1204 get_format_flags (char **format_ptr)
1208 for (; **format_ptr; (*format_ptr)++)
1210 switch (**format_ptr)
1221 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1232 get_format_width (char **format_ptr)
1238 start = *format_ptr;
1239 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1242 ch_save = **format_ptr;
1243 **format_ptr = '\0';
1244 /* In the case where no minimum field width is explicitly specified,
1245 allow for enough octal digits to represent the value of LONG_MAX. */
1246 count = ((*format_ptr == start)
1247 ? bytes_to_octal_digits[sizeof (long)]
1248 /* FIXME: don't use atoi, it may silently overflow.
1249 Besides, we know the result is non-negative, so shouldn't
1251 : (unsigned) atoi (start));
1252 **format_ptr = ch_save;
1257 get_format_prec (char **format_ptr)
1264 if (**format_ptr != '.')
1268 if (**format_ptr == '-' || **format_ptr == '+')
1270 is_negative = (**format_ptr == '-');
1278 start = *format_ptr;
1279 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1282 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1285 start = *format_ptr;
1287 ch_save = **format_ptr;
1288 **format_ptr = '\0';
1289 count = (*format_ptr == start) ? 11 : atoi (start);
1290 **format_ptr = ch_save;
1296 get_format_conv_type (char **format_ptr)
1298 int ch = *((*format_ptr)++);
1311 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1316 error (EXIT_FAILURE, 0,
1317 _("invalid conversion specifier in suffix: %c"), ch);
1319 error (EXIT_FAILURE, 0,
1320 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1325 max_out (char *format)
1327 unsigned out_count = 0;
1328 unsigned percents = 0;
1339 out_count += get_format_flags (&format);
1341 int width = get_format_width (&format);
1342 int prec = get_format_prec (&format);
1344 out_count += MAX (width, prec);
1346 get_format_conv_type (&format);
1351 error (EXIT_FAILURE, 0,
1352 _("missing %% conversion specification in suffix"));
1353 else if (percents > 1)
1354 error (EXIT_FAILURE, 0,
1355 _("too many %% conversion specifications in suffix"));
1361 main (int argc, char **argv)
1366 struct sigaction oldact, newact;
1369 program_name = argv[0];
1370 setlocale (LC_ALL, "");
1371 bindtextdomain (PACKAGE, LOCALEDIR);
1372 textdomain (PACKAGE);
1374 atexit (close_stdout);
1379 suppress_count = FALSE;
1380 remove_files = TRUE;
1381 prefix = DEFAULT_PREFIX;
1383 /* Change the way xmalloc and xrealloc fail. */
1384 xalloc_fail_func = cleanup;
1387 newact.sa_handler = interrupt_handler;
1388 sigemptyset (&newact.sa_mask);
1389 newact.sa_flags = 0;
1391 sigaction (SIGHUP, NULL, &oldact);
1392 if (oldact.sa_handler != SIG_IGN)
1393 sigaction (SIGHUP, &newact, NULL);
1395 sigaction (SIGINT, NULL, &oldact);
1396 if (oldact.sa_handler != SIG_IGN)
1397 sigaction (SIGINT, &newact, NULL);
1399 sigaction (SIGQUIT, NULL, &oldact);
1400 if (oldact.sa_handler != SIG_IGN)
1401 sigaction (SIGQUIT, &newact, NULL);
1403 sigaction (SIGTERM, NULL, &oldact);
1404 if (oldact.sa_handler != SIG_IGN)
1405 sigaction (SIGTERM, &newact, NULL);
1407 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1408 signal (SIGHUP, interrupt_handler);
1409 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1410 signal (SIGINT, interrupt_handler);
1411 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1412 signal (SIGQUIT, interrupt_handler);
1413 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1414 signal (SIGTERM, interrupt_handler);
1417 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1432 remove_files = FALSE;
1436 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1438 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1444 suppress_count = TRUE;
1448 elide_empty_files = TRUE;
1451 case_GETOPT_HELP_CHAR;
1453 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1456 usage (EXIT_FAILURE);
1459 if (argc - optind < 2)
1461 error (0, 0, _("too few arguments"));
1462 usage (EXIT_FAILURE);
1466 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1468 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1470 set_input_file (argv[optind++]);
1472 parse_patterns (argc, optind, argv);
1476 if (close (input_desc) < 0)
1478 error (0, errno, _("read error"));
1482 exit (EXIT_SUCCESS);
1489 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1494 Usage: %s [OPTION]... FILE PATTERN...\n\
1498 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1499 and output byte counts of each piece to standard output.\n\
1503 Mandatory arguments to long options are mandatory for short options too.\n\
1506 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %d\n\
1507 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1508 -k, --keep-files do not remove output files on errors\n\
1511 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1512 -s, --quiet, --silent do not print counts of output file sizes\n\
1513 -z, --elide-empty-files remove empty output files\n\
1515 fputs (HELP_OPTION_DESCRIPTION, stdout);
1516 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1519 Read standard input if FILE is -. Each PATTERN may be:\n\
1523 INTEGER copy up to but not including specified line number\n\
1524 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1525 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1526 {INTEGER} repeat the previous pattern specified number of times\n\
1527 {*} repeat the previous pattern as many times as possible\n\
1529 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1531 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1533 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);