Bump to version 1.22.1
[platform/upstream/busybox.git] / editors / sed.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * sed.c - very minimalist version of sed
4  *
5  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7  * Copyright (C) 2002  Matt Kraai
8  * Copyright (C) 2003 by Glenn McGrath
9  * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
10  *
11  * MAINTAINER: Rob Landley <rob@landley.net>
12  *
13  * Licensed under GPLv2, see file LICENSE in this source tree.
14  */
15
16 /* Code overview.
17  *
18  * Files are laid out to avoid unnecessary function declarations.  So for
19  * example, every function add_cmd calls occurs before add_cmd in this file.
20  *
21  * add_cmd() is called on each line of sed command text (from a file or from
22  * the command line).  It calls get_address() and parse_cmd_args().  The
23  * resulting sed_cmd_t structures are appended to a linked list
24  * (G.sed_cmd_head/G.sed_cmd_tail).
25  *
26  * process_files() does actual sedding, reading data lines from each input FILE*
27  * (which could be stdin) and applying the sed command list (sed_cmd_head) to
28  * each of the resulting lines.
29  *
30  * sed_main() is where external code calls into this, with a command line.
31  */
32
33 /* Supported features and commands in this version of sed:
34  *
35  * - comments ('#')
36  * - address matching: num|/matchstr/[,num|/matchstr/|$]command
37  * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
38  * - edit commands: (a)ppend, (i)nsert, (c)hange
39  * - file commands: (r)ead
40  * - backreferences in substitution expressions (\0, \1, \2...\9)
41  * - grouped commands: {cmd1;cmd2}
42  * - transliteration (y/source-chars/dest-chars/)
43  * - pattern space hold space storing / swapping (g, h, x)
44  * - labels / branching (: label, b, t, T)
45  *
46  * (Note: Specifying an address (range) to match is *optional*; commands
47  * default to the whole pattern space if no specific address match was
48  * requested.)
49  *
50  * Todo:
51  * - Create a wrapper around regex to make libc's regex conform with sed
52  *
53  * Reference
54  * http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
55  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
56  */
57
58 //config:config SED
59 //config:       bool "sed"
60 //config:       default y
61 //config:       help
62 //config:         sed is used to perform text transformations on a file
63 //config:         or input from a pipeline.
64
65 //kbuild:lib-$(CONFIG_SED) += sed.o
66
67 //applet:IF_SED(APPLET(sed, BB_DIR_BIN, BB_SUID_DROP))
68
69 //usage:#define sed_trivial_usage
70 //usage:       "[-inrE] [-f FILE]... [-e CMD]... [FILE]...\n"
71 //usage:       "or: sed [-inrE] CMD [FILE]..."
72 //usage:#define sed_full_usage "\n\n"
73 //usage:       "        -e CMD  Add CMD to sed commands to be executed"
74 //usage:     "\n        -f FILE Add FILE contents to sed commands to be executed"
75 //usage:     "\n        -i[SFX] Edit files in-place (otherwise sends to stdout)"
76 //usage:     "\n                Optionally back files up, appending SFX"
77 //usage:     "\n        -n      Suppress automatic printing of pattern space"
78 //usage:     "\n        -r,-E   Use extended regex syntax"
79 //usage:     "\n"
80 //usage:     "\nIf no -e or -f, the first non-option argument is the sed command string."
81 //usage:     "\nRemaining arguments are input files (stdin if none)."
82 //usage:
83 //usage:#define sed_example_usage
84 //usage:       "$ echo \"foo\" | sed -e 's/f[a-zA-Z]o/bar/g'\n"
85 //usage:       "bar\n"
86
87 #include "libbb.h"
88 #include "xregex.h"
89
90 #if 0
91 # define dbg(...) bb_error_msg(__VA_ARGS__)
92 #else
93 # define dbg(...) ((void)0)
94 #endif
95
96
97 enum {
98         OPT_in_place = 1 << 0,
99 };
100
101 /* Each sed command turns into one of these structures. */
102 typedef struct sed_cmd_s {
103         /* Ordered by alignment requirements: currently 36 bytes on x86 */
104         struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
105
106         /* address storage */
107         regex_t *beg_match;     /* sed -e '/match/cmd' */
108         regex_t *end_match;     /* sed -e '/match/,/end_match/cmd' */
109         regex_t *sub_match;     /* For 's/sub_match/string/' */
110         int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
111         int beg_line_orig;      /* copy of the above, needed for -i */
112         int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
113
114         FILE *sw_file;          /* File (sw) command writes to, -1 for none. */
115         char *string;           /* Data string for (saicytb) commands. */
116
117         unsigned which_match;   /* (s) Which match to replace (0 for all) */
118
119         /* Bitfields (gcc won't group them if we don't) */
120         unsigned invert:1;      /* the '!' after the address */
121         unsigned in_match:1;    /* Next line also included in match? */
122         unsigned sub_p:1;       /* (s) print option */
123
124         char sw_last_char;      /* Last line written by (sw) had no '\n' */
125
126         /* GENERAL FIELDS */
127         char cmd;               /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
128 } sed_cmd_t;
129
130 static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
131
132 struct globals {
133         /* options */
134         int be_quiet, regex_type;
135
136         FILE *nonstdout;
137         char *outname, *hold_space;
138         smallint exitcode;
139
140         /* list of input files */
141         int current_input_file, last_input_file;
142         char **input_file_list;
143         FILE *current_fp;
144
145         regmatch_t regmatch[10];
146         regex_t *previous_regex_ptr;
147
148         /* linked list of sed commands */
149         sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
150
151         /* linked list of append lines */
152         llist_t *append_head;
153
154         char *add_cmd_line;
155
156         struct pipeline {
157                 char *buf;  /* Space to hold string */
158                 int idx;    /* Space used */
159                 int len;    /* Space allocated */
160         } pipeline;
161 } FIX_ALIASING;
162 #define G (*(struct globals*)&bb_common_bufsiz1)
163 struct BUG_G_too_big {
164         char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
165 };
166 #define INIT_G() do { \
167         G.sed_cmd_tail = &G.sed_cmd_head; \
168 } while (0)
169
170
171 #if ENABLE_FEATURE_CLEAN_UP
172 static void sed_free_and_close_stuff(void)
173 {
174         sed_cmd_t *sed_cmd = G.sed_cmd_head;
175
176         llist_free(G.append_head, free);
177
178         while (sed_cmd) {
179                 sed_cmd_t *sed_cmd_next = sed_cmd->next;
180
181                 if (sed_cmd->sw_file)
182                         xprint_and_close_file(sed_cmd->sw_file);
183
184                 if (sed_cmd->beg_match) {
185                         regfree(sed_cmd->beg_match);
186                         free(sed_cmd->beg_match);
187                 }
188                 if (sed_cmd->end_match) {
189                         regfree(sed_cmd->end_match);
190                         free(sed_cmd->end_match);
191                 }
192                 if (sed_cmd->sub_match) {
193                         regfree(sed_cmd->sub_match);
194                         free(sed_cmd->sub_match);
195                 }
196                 free(sed_cmd->string);
197                 free(sed_cmd);
198                 sed_cmd = sed_cmd_next;
199         }
200
201         free(G.hold_space);
202
203         if (G.current_fp)
204                 fclose(G.current_fp);
205 }
206 #else
207 void sed_free_and_close_stuff(void);
208 #endif
209
210 /* If something bad happens during -i operation, delete temp file */
211
212 static void cleanup_outname(void)
213 {
214         if (G.outname) unlink(G.outname);
215 }
216
217 /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
218
219 static void parse_escapes(char *dest, const char *string, int len, char from, char to)
220 {
221         int i = 0;
222
223         while (i < len) {
224                 if (string[i] == '\\') {
225                         if (!to || string[i+1] == from) {
226                                 *dest++ = to ? to : string[i+1];
227                                 i += 2;
228                                 continue;
229                         }
230                         *dest++ = string[i++];
231                 }
232                 /* TODO: is it safe wrt a string with trailing '\\' ? */
233                 *dest++ = string[i++];
234         }
235         *dest = '\0';
236 }
237
238 static char *copy_parsing_escapes(const char *string, int len)
239 {
240         const char *s;
241         char *dest = xmalloc(len + 1);
242
243         /* sed recognizes \n */
244         /* GNU sed also recognizes \t and \r */
245         for (s = "\nn\tt\rr"; *s; s += 2) {
246                 parse_escapes(dest, string, len, s[1], s[0]);
247                 string = dest;
248                 len = strlen(dest);
249         }
250         return dest;
251 }
252
253
254 /*
255  * index_of_next_unescaped_regexp_delim - walks left to right through a string
256  * beginning at a specified index and returns the index of the next regular
257  * expression delimiter (typically a forward slash ('/')) not preceded by
258  * a backslash ('\').  A negative delimiter disables square bracket checking.
259  */
260 static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
261 {
262         int bracket = -1;
263         int escaped = 0;
264         int idx = 0;
265         char ch;
266
267         if (delimiter < 0) {
268                 bracket--;
269                 delimiter = -delimiter;
270         }
271
272         for (; (ch = str[idx]) != '\0'; idx++) {
273                 if (bracket >= 0) {
274                         if (ch == ']'
275                          && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
276                         ) {
277                                 bracket = -1;
278                         }
279                 } else if (escaped)
280                         escaped = 0;
281                 else if (ch == '\\')
282                         escaped = 1;
283                 else if (bracket == -1 && ch == '[')
284                         bracket = idx;
285                 else if (ch == delimiter)
286                         return idx;
287         }
288
289         /* if we make it to here, we've hit the end of the string */
290         bb_error_msg_and_die("unmatched '%c'", delimiter);
291 }
292
293 /*
294  *  Returns the index of the third delimiter
295  */
296 static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
297 {
298         const char *cmdstr_ptr = cmdstr;
299         unsigned char delimiter;
300         int idx = 0;
301
302         /* verify that the 's' or 'y' is followed by something.  That something
303          * (typically a 'slash') is now our regexp delimiter... */
304         if (*cmdstr == '\0')
305                 bb_error_msg_and_die("bad format in substitution expression");
306         delimiter = *cmdstr_ptr++;
307
308         /* save the match string */
309         idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
310         *match = copy_parsing_escapes(cmdstr_ptr, idx);
311
312         /* save the replacement string */
313         cmdstr_ptr += idx + 1;
314         idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
315         *replace = copy_parsing_escapes(cmdstr_ptr, idx);
316
317         return ((cmdstr_ptr - cmdstr) + idx);
318 }
319
320 /*
321  * returns the index in the string just past where the address ends.
322  */
323 static int get_address(const char *my_str, int *linenum, regex_t ** regex)
324 {
325         const char *pos = my_str;
326
327         if (isdigit(*my_str)) {
328                 *linenum = strtol(my_str, (char**)&pos, 10);
329                 /* endstr shouldnt ever equal NULL */
330         } else if (*my_str == '$') {
331                 *linenum = -1;
332                 pos++;
333         } else if (*my_str == '/' || *my_str == '\\') {
334                 int next;
335                 char delimiter;
336                 char *temp;
337
338                 delimiter = '/';
339                 if (*my_str == '\\')
340                         delimiter = *++pos;
341                 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
342                 temp = copy_parsing_escapes(pos, next);
343                 *regex = xzalloc(sizeof(regex_t));
344                 xregcomp(*regex, temp, G.regex_type);
345                 free(temp);
346                 /* Move position to next character after last delimiter */
347                 pos += (next+1);
348         }
349         return pos - my_str;
350 }
351
352 /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */
353 static int parse_file_cmd(/*sed_cmd_t *sed_cmd,*/ const char *filecmdstr, char **retval)
354 {
355         int start = 0, idx, hack = 0;
356
357         /* Skip whitespace, then grab filename to end of line */
358         while (isspace(filecmdstr[start]))
359                 start++;
360         idx = start;
361         while (filecmdstr[idx] && filecmdstr[idx] != '\n')
362                 idx++;
363
364         /* If lines glued together, put backslash back. */
365         if (filecmdstr[idx] == '\n')
366                 hack = 1;
367         if (idx == start)
368                 bb_error_msg_and_die("empty filename");
369         *retval = xstrndup(filecmdstr+start, idx-start+hack+1);
370         if (hack)
371                 (*retval)[idx] = '\\';
372
373         return idx;
374 }
375
376 static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
377 {
378         int cflags = G.regex_type;
379         char *match;
380         int idx;
381
382         /*
383          * A substitution command should look something like this:
384          *    s/match/replace/ #giIpw
385          *    ||     |        |||
386          *    mandatory       optional
387          */
388         idx = parse_regex_delim(substr, &match, &sed_cmd->string);
389
390         /* determine the number of back references in the match string */
391         /* Note: we compute this here rather than in the do_subst_command()
392          * function to save processor time, at the expense of a little more memory
393          * (4 bits) per sed_cmd */
394
395         /* process the flags */
396
397         sed_cmd->which_match = 1;
398         while (substr[++idx]) {
399                 /* Parse match number */
400                 if (isdigit(substr[idx])) {
401                         if (match[0] != '^') {
402                                 /* Match 0 treated as all, multiple matches we take the last one. */
403                                 const char *pos = substr + idx;
404 /* FIXME: error check? */
405                                 sed_cmd->which_match = (unsigned)strtol(substr+idx, (char**) &pos, 10);
406                                 idx = pos - substr;
407                         }
408                         continue;
409                 }
410                 /* Skip spaces */
411                 if (isspace(substr[idx]))
412                         continue;
413
414                 switch (substr[idx]) {
415                 /* Replace all occurrences */
416                 case 'g':
417                         if (match[0] != '^')
418                                 sed_cmd->which_match = 0;
419                         break;
420                 /* Print pattern space */
421                 case 'p':
422                         sed_cmd->sub_p = 1;
423                         break;
424                 /* Write to file */
425                 case 'w':
426                 {
427                         char *temp;
428                         idx += parse_file_cmd(/*sed_cmd,*/ substr+idx, &temp);
429                         break;
430                 }
431                 /* Ignore case (gnu exension) */
432                 case 'i':
433                 case 'I':
434                         cflags |= REG_ICASE;
435                         break;
436                 /* Comment */
437                 case '#':
438                         // while (substr[++idx]) continue;
439                         idx += strlen(substr + idx); // same
440                         /* Fall through */
441                 /* End of command */
442                 case ';':
443                 case '}':
444                         goto out;
445                 default:
446                         bb_error_msg_and_die("bad option in substitution expression");
447                 }
448         }
449  out:
450         /* compile the match string into a regex */
451         if (*match != '\0') {
452                 /* If match is empty, we use last regex used at runtime */
453                 sed_cmd->sub_match = xzalloc(sizeof(regex_t));
454                 dbg("xregcomp('%s',%x)", match, cflags);
455                 xregcomp(sed_cmd->sub_match, match, cflags);
456                 dbg("regcomp ok");
457         }
458         free(match);
459
460         return idx;
461 }
462
463 /*
464  *  Process the commands arguments
465  */
466 static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
467 {
468         static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}";
469         enum {
470                 IDX_s = 0,
471                 IDX_a,
472                 IDX_i,
473                 IDX_c,
474                 IDX_r,
475                 IDX_w,
476                 IDX_colon,
477                 IDX_b,
478                 IDX_t,
479                 IDX_T,
480                 IDX_y,
481                 IDX_d,
482                 IDX_D,
483                 IDX_g,
484                 IDX_G,
485                 IDX_h,
486                 IDX_H,
487                 IDX_l,
488                 IDX_n,
489                 IDX_N,
490                 IDX_p,
491                 IDX_P,
492                 IDX_q,
493                 IDX_x,
494                 IDX_equal,
495                 IDX_lbrace,
496                 IDX_rbrace,
497                 IDX_nul
498         };
499         struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; };
500
501         unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
502
503         /* handle (s)ubstitution command */
504         if (idx == IDX_s) {
505                 cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
506         }
507         /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
508         else if (idx <= IDX_c) { /* a,i,c */
509                 if (idx < IDX_c) { /* a,i */
510                         if (sed_cmd->end_line || sed_cmd->end_match)
511                                 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
512                 }
513                 for (;;) {
514                         if (*cmdstr == '\n' || *cmdstr == '\\') {
515                                 cmdstr++;
516                                 break;
517                         }
518                         if (!isspace(*cmdstr))
519                                 break;
520                         cmdstr++;
521                 }
522                 sed_cmd->string = xstrdup(cmdstr);
523                 /* "\anychar" -> "anychar" */
524                 parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
525                 cmdstr += strlen(cmdstr);
526         }
527         /* handle file cmds: (r)ead */
528         else if (idx <= IDX_w) { /* r,w */
529                 if (idx < IDX_w) { /* r */
530                         if (sed_cmd->end_line || sed_cmd->end_match)
531                                 bb_error_msg_and_die("command '%c' uses only one address", sed_cmd->cmd);
532                 }
533                 cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
534                 if (sed_cmd->cmd == 'w') {
535                         sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
536                         sed_cmd->sw_last_char = '\n';
537                 }
538         }
539         /* handle branch commands */
540         else if (idx <= IDX_T) { /* :,b,t,T */
541                 int length;
542
543                 cmdstr = skip_whitespace(cmdstr);
544                 length = strcspn(cmdstr, semicolon_whitespace);
545                 if (length) {
546                         sed_cmd->string = xstrndup(cmdstr, length);
547                         cmdstr += length;
548                 }
549         }
550         /* translation command */
551         else if (idx == IDX_y) {
552                 char *match, *replace;
553                 int i = cmdstr[0];
554
555                 cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1;
556                 /* \n already parsed, but \delimiter needs unescaping. */
557                 parse_escapes(match, match, strlen(match), i, i);
558                 parse_escapes(replace, replace, strlen(replace), i, i);
559
560                 sed_cmd->string = xzalloc((strlen(match) + 1) * 2);
561                 for (i = 0; match[i] && replace[i]; i++) {
562                         sed_cmd->string[i*2] = match[i];
563                         sed_cmd->string[i*2+1] = replace[i];
564                 }
565                 free(match);
566                 free(replace);
567         }
568         /* if it wasnt a single-letter command that takes no arguments
569          * then it must be an invalid command.
570          */
571         else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
572                 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
573         }
574
575         /* give back whatever's left over */
576         return cmdstr;
577 }
578
579
580 /* Parse address+command sets, skipping comment lines. */
581
582 static void add_cmd(const char *cmdstr)
583 {
584         sed_cmd_t *sed_cmd;
585         unsigned len, n;
586
587         /* Append this line to any unfinished line from last time. */
588         if (G.add_cmd_line) {
589                 char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr);
590                 free(G.add_cmd_line);
591                 cmdstr = G.add_cmd_line = tp;
592         }
593
594         /* If this line ends with unescaped backslash, request next line. */
595         n = len = strlen(cmdstr);
596         while (n && cmdstr[n-1] == '\\')
597                 n--;
598         if ((len - n) & 1) { /* if odd number of trailing backslashes */
599                 if (!G.add_cmd_line)
600                         G.add_cmd_line = xstrdup(cmdstr);
601                 G.add_cmd_line[len-1] = '\0';
602                 return;
603         }
604
605         /* Loop parsing all commands in this line. */
606         while (*cmdstr) {
607                 /* Skip leading whitespace and semicolons */
608                 cmdstr += strspn(cmdstr, semicolon_whitespace);
609
610                 /* If no more commands, exit. */
611                 if (!*cmdstr) break;
612
613                 /* if this is a comment, jump past it and keep going */
614                 if (*cmdstr == '#') {
615                         /* "#n" is the same as using -n on the command line */
616                         if (cmdstr[1] == 'n')
617                                 G.be_quiet++;
618                         cmdstr = strpbrk(cmdstr, "\n\r");
619                         if (!cmdstr) break;
620                         continue;
621                 }
622
623                 /* parse the command
624                  * format is: [addr][,addr][!]cmd
625                  *            |----||-----||-|
626                  *            part1 part2  part3
627                  */
628
629                 sed_cmd = xzalloc(sizeof(sed_cmd_t));
630
631                 /* first part (if present) is an address: either a '$', a number or a /regex/ */
632                 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
633                 sed_cmd->beg_line_orig = sed_cmd->beg_line;
634
635                 /* second part (if present) will begin with a comma */
636                 if (*cmdstr == ',') {
637                         int idx;
638
639                         cmdstr++;
640                         idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
641                         if (!idx)
642                                 bb_error_msg_and_die("no address after comma");
643                         cmdstr += idx;
644                 }
645
646                 /* skip whitespace before the command */
647                 cmdstr = skip_whitespace(cmdstr);
648
649                 /* Check for inversion flag */
650                 if (*cmdstr == '!') {
651                         sed_cmd->invert = 1;
652                         cmdstr++;
653
654                         /* skip whitespace before the command */
655                         cmdstr = skip_whitespace(cmdstr);
656                 }
657
658                 /* last part (mandatory) will be a command */
659                 if (!*cmdstr)
660                         bb_error_msg_and_die("missing command");
661                 sed_cmd->cmd = *cmdstr++;
662                 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
663
664                 /* cmdstr now points past args.
665                  * GNU sed requires a separator, if there are more commands,
666                  * else it complains "char N: extra characters after command".
667                  * Example: "sed 'p;d'". We also allow "sed 'pd'".
668                  */
669
670                 /* Add the command to the command array */
671                 *G.sed_cmd_tail = sed_cmd;
672                 G.sed_cmd_tail = &sed_cmd->next;
673         }
674
675         /* If we glued multiple lines together, free the memory. */
676         free(G.add_cmd_line);
677         G.add_cmd_line = NULL;
678 }
679
680 /* Append to a string, reallocating memory as necessary. */
681
682 #define PIPE_GROW 64
683
684 static void pipe_putc(char c)
685 {
686         if (G.pipeline.idx == G.pipeline.len) {
687                 G.pipeline.buf = xrealloc(G.pipeline.buf,
688                                 G.pipeline.len + PIPE_GROW);
689                 G.pipeline.len += PIPE_GROW;
690         }
691         G.pipeline.buf[G.pipeline.idx++] = c;
692 }
693
694 static void do_subst_w_backrefs(char *line, char *replace)
695 {
696         int i, j;
697
698         /* go through the replacement string */
699         for (i = 0; replace[i]; i++) {
700                 /* if we find a backreference (\1, \2, etc.) print the backref'ed text */
701                 if (replace[i] == '\\') {
702                         unsigned backref = replace[++i] - '0';
703                         if (backref <= 9) {
704                                 /* print out the text held in G.regmatch[backref] */
705                                 if (G.regmatch[backref].rm_so != -1) {
706                                         j = G.regmatch[backref].rm_so;
707                                         while (j < G.regmatch[backref].rm_eo)
708                                                 pipe_putc(line[j++]);
709                                 }
710                                 continue;
711                         }
712                         /* I _think_ it is impossible to get '\' to be
713                          * the last char in replace string. Thus we dont check
714                          * for replace[i] == NUL. (counterexample anyone?) */
715                         /* if we find a backslash escaped character, print the character */
716                         pipe_putc(replace[i]);
717                         continue;
718                 }
719                 /* if we find an unescaped '&' print out the whole matched text. */
720                 if (replace[i] == '&') {
721                         j = G.regmatch[0].rm_so;
722                         while (j < G.regmatch[0].rm_eo)
723                                 pipe_putc(line[j++]);
724                         continue;
725                 }
726                 /* Otherwise just output the character. */
727                 pipe_putc(replace[i]);
728         }
729 }
730
731 static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
732 {
733         char *line = *line_p;
734         unsigned match_count = 0;
735         bool altered = 0;
736         bool prev_match_empty = 1;
737         bool tried_at_eol = 0;
738         regex_t *current_regex;
739
740         current_regex = sed_cmd->sub_match;
741         /* Handle empty regex. */
742         if (!current_regex) {
743                 current_regex = G.previous_regex_ptr;
744                 if (!current_regex)
745                         bb_error_msg_and_die("no previous regexp");
746         }
747         G.previous_regex_ptr = current_regex;
748
749         /* Find the first match */
750         dbg("matching '%s'", line);
751         if (REG_NOMATCH == regexec(current_regex, line, 10, G.regmatch, 0)) {
752                 dbg("no match");
753                 return 0;
754         }
755         dbg("match");
756
757         /* Initialize temporary output buffer. */
758         G.pipeline.buf = xmalloc(PIPE_GROW);
759         G.pipeline.len = PIPE_GROW;
760         G.pipeline.idx = 0;
761
762         /* Now loop through, substituting for matches */
763         do {
764                 int start = G.regmatch[0].rm_so;
765                 int end = G.regmatch[0].rm_eo;
766                 int i;
767
768                 match_count++;
769
770                 /* If we aren't interested in this match, output old line to
771                  * end of match and continue */
772                 if (sed_cmd->which_match
773                  && (sed_cmd->which_match != match_count)
774                 ) {
775                         for (i = 0; i < end; i++)
776                                 pipe_putc(*line++);
777                         /* Null match? Print one more char */
778                         if (start == end && *line)
779                                 pipe_putc(*line++);
780                         goto next;
781                 }
782
783                 /* Print everything before the match */
784                 for (i = 0; i < start; i++)
785                         pipe_putc(line[i]);
786
787                 /* Then print the substitution string,
788                  * unless we just matched empty string after non-empty one.
789                  * Example: string "cccd", pattern "c*", repl "R":
790                  * result is "RdR", not "RRdR": first match "ccc",
791                  * second is "" before "d", third is "" after "d".
792                  * Second match is NOT replaced!
793                  */
794                 if (prev_match_empty || start != 0 || start != end) {
795                         //dbg("%d %d %d", prev_match_empty, start, end);
796                         dbg("inserting replacement at %d in '%s'", start, line);
797                         do_subst_w_backrefs(line, sed_cmd->string);
798                         /* Flag that something has changed */
799                         altered = 1;
800                 } else {
801                         dbg("NOT inserting replacement at %d in '%s'", start, line);
802                 }
803
804                 /* If matched string is empty (f.e. "c*" pattern),
805                  * copy verbatim one char after it before attempting more matches
806                  */
807                 prev_match_empty = (start == end);
808                 if (prev_match_empty) {
809                         if (!line[end]) {
810                                 tried_at_eol = 1;
811                         } else {
812                                 pipe_putc(line[end]);
813                                 end++;
814                         }
815                 }
816
817                 /* Advance past the match */
818                 dbg("line += %d", end);
819                 line += end;
820
821                 /* if we're not doing this globally, get out now */
822                 if (sed_cmd->which_match != 0)
823                         break;
824  next:
825                 /* Exit if we are at EOL and already tried matching at it */
826                 if (*line == '\0') {
827                         if (tried_at_eol)
828                                 break;
829                         tried_at_eol = 1;
830                 }
831
832 //maybe (end ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
833         } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
834
835         /* Copy rest of string into output pipeline */
836         while (1) {
837                 char c = *line++;
838                 pipe_putc(c);
839                 if (c == '\0')
840                         break;
841         }
842
843         free(*line_p);
844         *line_p = G.pipeline.buf;
845         return altered;
846 }
847
848 /* Set command pointer to point to this label.  (Does not handle null label.) */
849 static sed_cmd_t *branch_to(char *label)
850 {
851         sed_cmd_t *sed_cmd;
852
853         for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
854                 if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) {
855                         return sed_cmd;
856                 }
857         }
858         bb_error_msg_and_die("can't find label for jump to '%s'", label);
859 }
860
861 static void append(char *s)
862 {
863         llist_add_to_end(&G.append_head, s);
864 }
865
866 /* Output line of text. */
867 /* Note:
868  * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed.
869  * Without them, we had this:
870  * echo -n thingy >z1
871  * echo -n again >z2
872  * >znull
873  * sed "s/i/z/" z1 z2 znull | hexdump -vC
874  * output:
875  * gnu sed 4.1.5:
876  * 00000000  74 68 7a 6e 67 79 0a 61  67 61 7a 6e              |thzngy.agazn|
877  * bbox:
878  * 00000000  74 68 7a 6e 67 79 61 67  61 7a 6e                 |thzngyagazn|
879  */
880 enum {
881         NO_EOL_CHAR = 1,
882         LAST_IS_NUL = 2,
883 };
884 static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char)
885 {
886         char lpc = *last_puts_char;
887
888         /* Need to insert a '\n' between two files because first file's
889          * last line wasn't terminated? */
890         if (lpc != '\n' && lpc != '\0') {
891                 fputc('\n', file);
892                 lpc = '\n';
893         }
894         fputs(s, file);
895
896         /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */
897         if (s[0])
898                 lpc = 'x';
899
900         /* had trailing '\0' and it was last char of file? */
901         if (last_gets_char == LAST_IS_NUL) {
902                 fputc('\0', file);
903                 lpc = 'x'; /* */
904         } else
905         /* had trailing '\n' or '\0'? */
906         if (last_gets_char != NO_EOL_CHAR) {
907                 fputc(last_gets_char, file);
908                 lpc = last_gets_char;
909         }
910
911         if (ferror(file)) {
912                 xfunc_error_retval = 4;  /* It's what gnu sed exits with... */
913                 bb_error_msg_and_die(bb_msg_write_error);
914         }
915         *last_puts_char = lpc;
916 }
917
918 static void flush_append(char *last_puts_char, char last_gets_char)
919 {
920         char *data;
921
922         /* Output appended lines. */
923         while ((data = (char *)llist_pop(&G.append_head))) {
924                 puts_maybe_newline(data, G.nonstdout, last_puts_char, last_gets_char);
925                 free(data);
926         }
927 }
928
929 /* Get next line of input from G.input_file_list, flushing append buffer and
930  * noting if we ran out of files without a newline on the last line we read.
931  */
932 static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets_char)
933 {
934         char *temp = NULL;
935         int len;
936         char gc;
937
938         flush_append(last_puts_char, last_gets_char);
939
940         /* will be returned if last line in the file
941          * doesn't end with either '\n' or '\0' */
942         gc = NO_EOL_CHAR;
943         for (; G.current_input_file <= G.last_input_file; G.current_input_file++) {
944                 FILE *fp = G.current_fp;
945                 if (!fp) {
946                         const char *path = G.input_file_list[G.current_input_file];
947                         fp = stdin;
948                         if (path != bb_msg_standard_input) {
949                                 fp = fopen_or_warn(path, "r");
950                                 if (!fp) {
951                                         G.exitcode = EXIT_FAILURE;
952                                         continue;
953                                 }
954                         }
955                         G.current_fp = fp;
956                 }
957                 /* Read line up to a newline or NUL byte, inclusive,
958                  * return malloc'ed char[]. length of the chunk read
959                  * is stored in len. NULL if EOF/error */
960                 temp = bb_get_chunk_from_file(fp, &len);
961                 if (temp) {
962                         /* len > 0 here, it's ok to do temp[len-1] */
963                         char c = temp[len-1];
964                         if (c == '\n' || c == '\0') {
965                                 temp[len-1] = '\0';
966                                 gc = c;
967                                 if (c == '\0') {
968                                         int ch = fgetc(fp);
969                                         if (ch != EOF)
970                                                 ungetc(ch, fp);
971                                         else
972                                                 gc = LAST_IS_NUL;
973                                 }
974                         }
975                         /* else we put NO_EOL_CHAR into *gets_char */
976                         break;
977
978                 /* NB: I had the idea of peeking next file(s) and returning
979                  * NO_EOL_CHAR only if it is the *last* non-empty
980                  * input file. But there is a case where this won't work:
981                  * file1: "a woo\nb woo"
982                  * file2: "c no\nd no"
983                  * sed -ne 's/woo/bang/p' input1 input2 => "a bang\nb bang"
984                  * (note: *no* newline after "b bang"!) */
985                 }
986                 /* Close this file and advance to next one */
987                 fclose_if_not_stdin(fp);
988                 G.current_fp = NULL;
989         }
990         *gets_char = gc;
991         return temp;
992 }
993
994 #define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n))
995
996 static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space)
997 {
998         int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0);
999         if (retval)
1000                 G.previous_regex_ptr = sed_cmd->beg_match;
1001         return retval;
1002 }
1003
1004 /* Process all the lines in all the files */
1005
1006 static void process_files(void)
1007 {
1008         char *pattern_space, *next_line;
1009         int linenum = 0;
1010         char last_puts_char = '\n';
1011         char last_gets_char, next_gets_char;
1012         sed_cmd_t *sed_cmd;
1013         int substituted;
1014
1015         /* Prime the pump */
1016         next_line = get_next_line(&next_gets_char, &last_puts_char, '\n' /*last_gets_char*/);
1017
1018         /* Go through every line in each file */
1019  again:
1020         substituted = 0;
1021
1022         /* Advance to next line.  Stop if out of lines. */
1023         pattern_space = next_line;
1024         if (!pattern_space)
1025                 return;
1026         last_gets_char = next_gets_char;
1027
1028         /* Read one line in advance so we can act on the last line,
1029          * the '$' address */
1030         next_line = get_next_line(&next_gets_char, &last_puts_char, last_gets_char);
1031         linenum++;
1032
1033         /* For every line, go through all the commands */
1034  restart:
1035         for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1036                 int old_matched, matched;
1037
1038                 old_matched = sed_cmd->in_match;
1039
1040                 /* Determine if this command matches this line: */
1041
1042                 dbg("match1:%d", sed_cmd->in_match);
1043                 dbg("match2:%d", (!sed_cmd->beg_line && !sed_cmd->end_line
1044                                 && !sed_cmd->beg_match && !sed_cmd->end_match));
1045                 dbg("match3:%d", (sed_cmd->beg_line > 0
1046                         && (sed_cmd->end_line || sed_cmd->end_match
1047                             ? (sed_cmd->beg_line <= linenum)
1048                             : (sed_cmd->beg_line == linenum)
1049                             )
1050                         ));
1051                 dbg("match4:%d", (beg_match(sed_cmd, pattern_space)));
1052                 dbg("match5:%d", (sed_cmd->beg_line == -1 && next_line == NULL));
1053
1054                 /* Are we continuing a previous multi-line match? */
1055                 sed_cmd->in_match = sed_cmd->in_match
1056                         /* Or is no range necessary? */
1057                         || (!sed_cmd->beg_line && !sed_cmd->end_line
1058                                 && !sed_cmd->beg_match && !sed_cmd->end_match)
1059                         /* Or did we match the start of a numerical range? */
1060                         || (sed_cmd->beg_line > 0
1061                             && (sed_cmd->end_line || sed_cmd->end_match
1062                                   /* note: even if end is numeric and is < linenum too,
1063                                    * GNU sed matches! We match too, therefore we don't
1064                                    * check here that linenum <= end.
1065                                    * Example:
1066                                    * printf '1\n2\n3\n4\n' | sed -n '1{N;N;d};1p;2,3p;3p;4p'
1067                                    * first three input lines are deleted;
1068                                    * 4th line is matched and printed
1069                                    * by "2,3" (!) and by "4" ranges
1070                                    */
1071                                 ? (sed_cmd->beg_line <= linenum)    /* N,end */
1072                                 : (sed_cmd->beg_line == linenum)    /* N */
1073                                 )
1074                             )
1075                         /* Or does this line match our begin address regex? */
1076                         || (beg_match(sed_cmd, pattern_space))
1077                         /* Or did we match last line of input? */
1078                         || (sed_cmd->beg_line == -1 && next_line == NULL);
1079
1080                 /* Snapshot the value */
1081                 matched = sed_cmd->in_match;
1082
1083                 dbg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
1084                         sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
1085
1086                 /* Is this line the end of the current match? */
1087
1088                 if (matched) {
1089                         /* once matched, "n,xxx" range is dead, disabling it */
1090                         if (sed_cmd->beg_line > 0) {
1091                                 sed_cmd->beg_line = -2;
1092                         }
1093                         sed_cmd->in_match = !(
1094                                 /* has the ending line come, or is this a single address command? */
1095                                 (sed_cmd->end_line
1096                                         ? sed_cmd->end_line == -1
1097                                                 ? !next_line
1098                                                 : (sed_cmd->end_line <= linenum)
1099                                         : !sed_cmd->end_match
1100                                 )
1101                                 /* or does this line matches our last address regex */
1102                                 || (sed_cmd->end_match && old_matched
1103                                      && (regexec(sed_cmd->end_match,
1104                                                 pattern_space, 0, NULL, 0) == 0)
1105                                 )
1106                         );
1107                 }
1108
1109                 /* Skip blocks of commands we didn't match */
1110                 if (sed_cmd->cmd == '{') {
1111                         if (sed_cmd->invert ? matched : !matched) {
1112                                 unsigned nest_cnt = 0;
1113                                 while (1) {
1114                                         if (sed_cmd->cmd == '{')
1115                                                 nest_cnt++;
1116                                         if (sed_cmd->cmd == '}') {
1117                                                 nest_cnt--;
1118                                                 if (nest_cnt == 0)
1119                                                         break;
1120                                         }
1121                                         sed_cmd = sed_cmd->next;
1122                                         if (!sed_cmd)
1123                                                 bb_error_msg_and_die("unterminated {");
1124                                 }
1125                         }
1126                         continue;
1127                 }
1128
1129                 /* Okay, so did this line match? */
1130                 if (sed_cmd->invert ? matched : !matched)
1131                         continue; /* no */
1132
1133                 /* Update last used regex in case a blank substitute BRE is found */
1134                 if (sed_cmd->beg_match) {
1135                         G.previous_regex_ptr = sed_cmd->beg_match;
1136                 }
1137
1138                 /* actual sedding */
1139                 dbg("pattern_space:'%s' next_line:'%s' cmd:%c",
1140                                 pattern_space, next_line, sed_cmd->cmd);
1141                 switch (sed_cmd->cmd) {
1142
1143                 /* Print line number */
1144                 case '=':
1145                         fprintf(G.nonstdout, "%d\n", linenum);
1146                         break;
1147
1148                 /* Write the current pattern space up to the first newline */
1149                 case 'P':
1150                 {
1151                         char *tmp = strchr(pattern_space, '\n');
1152                         if (tmp) {
1153                                 *tmp = '\0';
1154                                 /* TODO: explain why '\n' below */
1155                                 sed_puts(pattern_space, '\n');
1156                                 *tmp = '\n';
1157                                 break;
1158                         }
1159                         /* Fall Through */
1160                 }
1161
1162                 /* Write the current pattern space to output */
1163                 case 'p':
1164                         /* NB: we print this _before_ the last line
1165                          * (of current file) is printed. Even if
1166                          * that line is nonterminated, we print
1167                          * '\n' here (gnu sed does the same) */
1168                         sed_puts(pattern_space, '\n');
1169                         break;
1170                 /* Delete up through first newline */
1171                 case 'D':
1172                 {
1173                         char *tmp = strchr(pattern_space, '\n');
1174                         if (tmp) {
1175                                 overlapping_strcpy(pattern_space, tmp + 1);
1176                                 goto restart;
1177                         }
1178                 }
1179                 /* discard this line. */
1180                 case 'd':
1181                         goto discard_line;
1182
1183                 /* Substitute with regex */
1184                 case 's':
1185                         if (!do_subst_command(sed_cmd, &pattern_space))
1186                                 break;
1187                         dbg("do_subst_command succeeded:'%s'", pattern_space);
1188                         substituted |= 1;
1189
1190                         /* handle p option */
1191                         if (sed_cmd->sub_p)
1192                                 sed_puts(pattern_space, last_gets_char);
1193                         /* handle w option */
1194                         if (sed_cmd->sw_file)
1195                                 puts_maybe_newline(
1196                                         pattern_space, sed_cmd->sw_file,
1197                                         &sed_cmd->sw_last_char, last_gets_char);
1198                         break;
1199
1200                 /* Append line to linked list to be printed later */
1201                 case 'a':
1202                         append(xstrdup(sed_cmd->string));
1203                         break;
1204
1205                 /* Insert text before this line */
1206                 case 'i':
1207                         sed_puts(sed_cmd->string, '\n');
1208                         break;
1209
1210                 /* Cut and paste text (replace) */
1211                 case 'c':
1212                         /* Only triggers on last line of a matching range. */
1213                         if (!sed_cmd->in_match)
1214                                 sed_puts(sed_cmd->string, '\n');
1215                         goto discard_line;
1216
1217                 /* Read file, append contents to output */
1218                 case 'r':
1219                 {
1220                         FILE *rfile;
1221                         rfile = fopen_for_read(sed_cmd->string);
1222                         if (rfile) {
1223                                 char *line;
1224                                 while ((line = xmalloc_fgetline(rfile))
1225                                                 != NULL)
1226                                         append(line);
1227                                 fclose(rfile);
1228                         }
1229
1230                         break;
1231                 }
1232
1233                 /* Write pattern space to file. */
1234                 case 'w':
1235                         puts_maybe_newline(
1236                                 pattern_space, sed_cmd->sw_file,
1237                                 &sed_cmd->sw_last_char, last_gets_char);
1238                         break;
1239
1240                 /* Read next line from input */
1241                 case 'n':
1242                         if (!G.be_quiet)
1243                                 sed_puts(pattern_space, last_gets_char);
1244                         if (next_line) {
1245                                 free(pattern_space);
1246                                 pattern_space = next_line;
1247                                 last_gets_char = next_gets_char;
1248                                 next_line = get_next_line(&next_gets_char, &last_puts_char, last_gets_char);
1249                                 substituted = 0;
1250                                 linenum++;
1251                                 break;
1252                         }
1253                         /* fall through */
1254
1255                 /* Quit.  End of script, end of input. */
1256                 case 'q':
1257                         /* Exit the outer while loop */
1258                         free(next_line);
1259                         next_line = NULL;
1260                         goto discard_commands;
1261
1262                 /* Append the next line to the current line */
1263                 case 'N':
1264                 {
1265                         int len;
1266                         /* If no next line, jump to end of script and exit. */
1267                         /* http://www.gnu.org/software/sed/manual/sed.html:
1268                          * "Most versions of sed exit without printing anything
1269                          * when the N command is issued on the last line of
1270                          * a file. GNU sed prints pattern space before exiting
1271                          * unless of course the -n command switch has been
1272                          * specified. This choice is by design."
1273                          */
1274                         if (next_line == NULL) {
1275                                 //goto discard_line;
1276                                 goto discard_commands; /* GNU behavior */
1277                         }
1278                         /* Append next_line, read new next_line. */
1279                         len = strlen(pattern_space);
1280                         pattern_space = xrealloc(pattern_space, len + strlen(next_line) + 2);
1281                         pattern_space[len] = '\n';
1282                         strcpy(pattern_space + len+1, next_line);
1283                         last_gets_char = next_gets_char;
1284                         next_line = get_next_line(&next_gets_char, &last_puts_char, last_gets_char);
1285                         linenum++;
1286                         break;
1287                 }
1288
1289                 /* Test/branch if substitution occurred */
1290                 case 't':
1291                         if (!substituted) break;
1292                         substituted = 0;
1293                         /* Fall through */
1294                 /* Test/branch if substitution didn't occur */
1295                 case 'T':
1296                         if (substituted) break;
1297                         /* Fall through */
1298                 /* Branch to label */
1299                 case 'b':
1300                         if (!sed_cmd->string) goto discard_commands;
1301                         else sed_cmd = branch_to(sed_cmd->string);
1302                         break;
1303                 /* Transliterate characters */
1304                 case 'y':
1305                 {
1306                         int i, j;
1307                         for (i = 0; pattern_space[i]; i++) {
1308                                 for (j = 0; sed_cmd->string[j]; j += 2) {
1309                                         if (pattern_space[i] == sed_cmd->string[j]) {
1310                                                 pattern_space[i] = sed_cmd->string[j + 1];
1311                                                 break;
1312                                         }
1313                                 }
1314                         }
1315
1316                         break;
1317                 }
1318                 case 'g':       /* Replace pattern space with hold space */
1319                         free(pattern_space);
1320                         pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
1321                         break;
1322                 case 'G':       /* Append newline and hold space to pattern space */
1323                 {
1324                         int pattern_space_size = 2;
1325                         int hold_space_size = 0;
1326
1327                         if (pattern_space)
1328                                 pattern_space_size += strlen(pattern_space);
1329                         if (G.hold_space)
1330                                 hold_space_size = strlen(G.hold_space);
1331                         pattern_space = xrealloc(pattern_space,
1332                                         pattern_space_size + hold_space_size);
1333                         if (pattern_space_size == 2)
1334                                 pattern_space[0] = 0;
1335                         strcat(pattern_space, "\n");
1336                         if (G.hold_space)
1337                                 strcat(pattern_space, G.hold_space);
1338                         last_gets_char = '\n';
1339
1340                         break;
1341                 }
1342                 case 'h':       /* Replace hold space with pattern space */
1343                         free(G.hold_space);
1344                         G.hold_space = xstrdup(pattern_space);
1345                         break;
1346                 case 'H':       /* Append newline and pattern space to hold space */
1347                 {
1348                         int hold_space_size = 2;
1349                         int pattern_space_size = 0;
1350
1351                         if (G.hold_space)
1352                                 hold_space_size += strlen(G.hold_space);
1353                         if (pattern_space)
1354                                 pattern_space_size = strlen(pattern_space);
1355                         G.hold_space = xrealloc(G.hold_space,
1356                                         hold_space_size + pattern_space_size);
1357
1358                         if (hold_space_size == 2)
1359                                 *G.hold_space = 0;
1360                         strcat(G.hold_space, "\n");
1361                         if (pattern_space)
1362                                 strcat(G.hold_space, pattern_space);
1363
1364                         break;
1365                 }
1366                 case 'x': /* Exchange hold and pattern space */
1367                 {
1368                         char *tmp = pattern_space;
1369                         pattern_space = G.hold_space ? G.hold_space : xzalloc(1);
1370                         last_gets_char = '\n';
1371                         G.hold_space = tmp;
1372                         break;
1373                 }
1374                 } /* switch */
1375         } /* for each cmd */
1376
1377         /*
1378          * Exit point from sedding...
1379          */
1380  discard_commands:
1381         /* we will print the line unless we were told to be quiet ('-n')
1382            or if the line was suppressed (ala 'd'elete) */
1383         if (!G.be_quiet)
1384                 sed_puts(pattern_space, last_gets_char);
1385
1386         /* Delete and such jump here. */
1387  discard_line:
1388         flush_append(&last_puts_char, last_gets_char);
1389         free(pattern_space);
1390
1391         goto again;
1392 }
1393
1394 /* It is possible to have a command line argument with embedded
1395  * newlines.  This counts as multiple command lines.
1396  * However, newline can be escaped: 's/e/z\<newline>z/'
1397  * add_cmd() handles this.
1398  */
1399
1400 static void add_cmd_block(char *cmdstr)
1401 {
1402         char *sv, *eol;
1403
1404         cmdstr = sv = xstrdup(cmdstr);
1405         do {
1406                 eol = strchr(cmdstr, '\n');
1407                 if (eol)
1408                         *eol = '\0';
1409                 add_cmd(cmdstr);
1410                 cmdstr = eol + 1;
1411         } while (eol);
1412         free(sv);
1413 }
1414
1415 int sed_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1416 int sed_main(int argc UNUSED_PARAM, char **argv)
1417 {
1418         unsigned opt;
1419         llist_t *opt_e, *opt_f;
1420         char *opt_i;
1421
1422 #if ENABLE_LONG_OPTS
1423         static const char sed_longopts[] ALIGN1 =
1424                 /* name             has_arg             short */
1425                 "in-place\0"        Optional_argument   "i"
1426                 "regexp-extended\0" No_argument         "r"
1427                 "quiet\0"           No_argument         "n"
1428                 "silent\0"          No_argument         "n"
1429                 "expression\0"      Required_argument   "e"
1430                 "file\0"            Required_argument   "f";
1431 #endif
1432
1433         INIT_G();
1434
1435         /* destroy command strings on exit */
1436         if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff);
1437
1438         /* Lie to autoconf when it starts asking stupid questions. */
1439         if (argv[1] && strcmp(argv[1], "--version") == 0) {
1440                 puts("This is not GNU sed version 4.0");
1441                 return 0;
1442         }
1443
1444         /* do normal option parsing */
1445         opt_e = opt_f = NULL;
1446         opt_i = NULL;
1447         opt_complementary = "e::f::" /* can occur multiple times */
1448                             "nn"; /* count -n */
1449
1450         IF_LONG_OPTS(applet_long_options = sed_longopts);
1451
1452         /* -i must be first, to match OPT_in_place definition */
1453         /* -E is a synonym of -r:
1454          * GNU sed 4.2.1 mentions it in neither --help
1455          * nor manpage, but does recognize it.
1456          */
1457         opt = getopt32(argv, "i::rEne:f:", &opt_i, &opt_e, &opt_f,
1458                             &G.be_quiet); /* counter for -n */
1459         //argc -= optind;
1460         argv += optind;
1461         if (opt & OPT_in_place) { // -i
1462                 atexit(cleanup_outname);
1463         }
1464         if (opt & (2|4))
1465                 G.regex_type |= REG_EXTENDED; // -r or -E
1466         //if (opt & 8)
1467         //      G.be_quiet++; // -n (implemented with a counter instead)
1468         while (opt_e) { // -e
1469                 add_cmd_block(llist_pop(&opt_e));
1470         }
1471         while (opt_f) { // -f
1472                 char *line;
1473                 FILE *cmdfile;
1474                 cmdfile = xfopen_for_read(llist_pop(&opt_f));
1475                 while ((line = xmalloc_fgetline(cmdfile)) != NULL) {
1476                         add_cmd(line);
1477                         free(line);
1478                 }
1479                 fclose(cmdfile);
1480         }
1481         /* if we didn't get a pattern from -e or -f, use argv[0] */
1482         if (!(opt & 0x30)) {
1483                 if (!*argv)
1484                         bb_show_usage();
1485                 add_cmd_block(*argv++);
1486         }
1487         /* Flush any unfinished commands. */
1488         add_cmd("");
1489
1490         /* By default, we write to stdout */
1491         G.nonstdout = stdout;
1492
1493         /* argv[0..(argc-1)] should be names of file to process. If no
1494          * files were specified or '-' was specified, take input from stdin.
1495          * Otherwise, we process all the files specified. */
1496         G.input_file_list = argv;
1497         if (!argv[0]) {
1498                 if (opt & OPT_in_place)
1499                         bb_error_msg_and_die(bb_msg_requires_arg, "-i");
1500                 argv[0] = (char*)bb_msg_standard_input;
1501                 /* G.last_input_file = 0; - already is */
1502         } else {
1503                 goto start;
1504
1505                 for (; *argv; argv++) {
1506                         struct stat statbuf;
1507                         int nonstdoutfd;
1508                         sed_cmd_t *sed_cmd;
1509
1510                         G.last_input_file++;
1511  start:
1512                         if (!(opt & OPT_in_place)) {
1513                                 if (LONE_DASH(*argv)) {
1514                                         *argv = (char*)bb_msg_standard_input;
1515                                         process_files();
1516                                 }
1517                                 continue;
1518                         }
1519
1520                         /* -i: process each FILE separately: */
1521
1522                         G.outname = xasprintf("%sXXXXXX", *argv);
1523                         nonstdoutfd = xmkstemp(G.outname);
1524                         G.nonstdout = xfdopen_for_write(nonstdoutfd);
1525
1526                         /* Set permissions/owner of output file */
1527                         stat(*argv, &statbuf);
1528                         /* chmod'ing AFTER chown would preserve suid/sgid bits,
1529                          * but GNU sed 4.2.1 does not preserve them either */
1530                         fchmod(nonstdoutfd, statbuf.st_mode);
1531                         fchown(nonstdoutfd, statbuf.st_uid, statbuf.st_gid);
1532
1533                         process_files();
1534                         fclose(G.nonstdout);
1535                         G.nonstdout = stdout;
1536
1537                         if (opt_i) {
1538                                 char *backupname = xasprintf("%s%s", *argv, opt_i);
1539                                 xrename(*argv, backupname);
1540                                 free(backupname);
1541                         }
1542                         /* else unlink(*argv); - rename below does this */
1543                         xrename(G.outname, *argv); //TODO: rollback backup on error?
1544                         free(G.outname);
1545                         G.outname = NULL;
1546
1547                         /* Re-enable disabled range matches */
1548                         for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
1549                                 sed_cmd->beg_line = sed_cmd->beg_line_orig;
1550                         }
1551                 }
1552                 /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
1553                  * if (G.current_input_file[G.current_input_file] == NULL)
1554                  *      return G.exitcode;
1555                  * but it's not needed since process_files() works correctly
1556                  * in this case too. */
1557         }
1558
1559         process_files();
1560
1561         return G.exitcode;
1562 }