small optimizations of toupper/tolower
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime" "\6mktime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     F0,         ARGIND,     ARGC,
370         ARGV,       ERRNO,      FNR,        NR,
371         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
372 };
373
374 static const char vNames[] ALIGN1 =
375         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
376         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
377         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
378         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
379         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381 static const char vValues[] ALIGN1 =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"    "\0"        "\377";
385
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391 /* Globals. Split in two parts so that first one is addressed
392  * with (mostly short) negative offsets.
393  * NB: it's unsafe to put members of type "double"
394  * into globals2 (gcc may fail to align them).
395  */
396 struct globals {
397         double t_double;
398         chain beginseq, mainseq, endseq;
399         chain *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double     (G1.t_double    )
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var * vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name) idx = *name++ + (idx << 6) - idx;
537         return idx;
538 }
539
540 /* create new hash */
541 static xhash *hash_init(void)
542 {
543         xhash *newhash;
544
545         newhash = xzalloc(sizeof(xhash));
546         newhash->csize = FIRST_PRIME;
547         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549         return newhash;
550 }
551
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
554 {
555         hash_item *hi;
556
557         hi = hash->items [ hashidx(name) % hash->csize ];
558         while (hi) {
559                 if (strcmp(hi->name, name) == 0)
560                         return &(hi->data);
561                 hi = hi->next;
562         }
563         return NULL;
564 }
565
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
568 {
569         unsigned newsize, i, idx;
570         hash_item **newitems, *hi, *thi;
571
572         if (hash->nprime == ARRAY_SIZE(PRIMES))
573                 return;
574
575         newsize = PRIMES[hash->nprime++];
576         newitems = xzalloc(newsize * sizeof(hash_item *));
577
578         for (i = 0; i < hash->csize; i++) {
579                 hi = hash->items[i];
580                 while (hi) {
581                         thi = hi;
582                         hi = thi->next;
583                         idx = hashidx(thi->name) % newsize;
584                         thi->next = newitems[idx];
585                         newitems[idx] = thi;
586                 }
587         }
588
589         free(hash->items);
590         hash->csize = newsize;
591         hash->items = newitems;
592 }
593
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
596 {
597         hash_item *hi;
598         unsigned idx;
599         int l;
600
601         hi = hash_search(hash, name);
602         if (!hi) {
603                 if (++hash->nel / hash->csize > 10)
604                         hash_rebuild(hash);
605
606                 l = strlen(name) + 1;
607                 hi = xzalloc(sizeof(*hi) + l);
608                 strcpy(hi->name, name);
609
610                 idx = hashidx(name) % hash->csize;
611                 hi->next = hash->items[idx];
612                 hash->items[idx] = hi;
613                 hash->glen += l;
614         }
615         return &(hi->data);
616 }
617
618 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
619 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
620 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
622
623 static void hash_remove(xhash *hash, const char *name)
624 {
625         hash_item *hi, **phi;
626
627         phi = &(hash->items[hashidx(name) % hash->csize]);
628         while (*phi) {
629                 hi = *phi;
630                 if (strcmp(hi->name, name) == 0) {
631                         hash->glen -= (strlen(name) + 1);
632                         hash->nel--;
633                         *phi = hi->next;
634                         free(hi);
635                         break;
636                 }
637                 phi = &(hi->next);
638         }
639 }
640
641 /* ------ some useful functions ------ */
642
643 static void skip_spaces(char **s)
644 {
645         char *p = *s;
646
647         while (1) {
648                 if (*p == '\\' && p[1] == '\n') {
649                         p++;
650                         t_lineno++;
651                 } else if (*p != ' ' && *p != '\t') {
652                         break;
653                 }
654                 p++;
655         }
656         *s = p;
657 }
658
659 static char *nextword(char **s)
660 {
661         char *p = *s;
662
663         while (*(*s)++) /* */;
664
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675         if (c == '\\' && *s == pps) c = *((*s)++);
676         return c;
677 }
678
679 static ALWAYS_INLINE int isalnum_(int c)
680 {
681         return (isalnum(c) || c == '_');
682 }
683
684 static double my_strtod(char **pp)
685 {
686 #if ENABLE_DESKTOP
687         if ((*pp)[0] == '0'
688          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
689         ) {
690                 return strtoull(*pp, pp, 0);
691         }
692 #endif
693         return strtod(*pp, pp);
694 }
695
696 /* -------- working with variables (set/get/copy/etc) -------- */
697
698 static xhash *iamarray(var *v)
699 {
700         var *a = v;
701
702         while (a->type & VF_CHILD)
703                 a = a->x.parent;
704
705         if (!(a->type & VF_ARRAY)) {
706                 a->type |= VF_ARRAY;
707                 a->x.array = hash_init();
708         }
709         return a->x.array;
710 }
711
712 static void clear_array(xhash *array)
713 {
714         unsigned i;
715         hash_item *hi, *thi;
716
717         for (i = 0; i < array->csize; i++) {
718                 hi = array->items[i];
719                 while (hi) {
720                         thi = hi;
721                         hi = hi->next;
722                         free(thi->data.v.string);
723                         free(thi);
724                 }
725                 array->items[i] = NULL;
726         }
727         array->glen = array->nel = 0;
728 }
729
730 /* clear a variable */
731 static var *clrvar(var *v)
732 {
733         if (!(v->type & VF_FSTR))
734                 free(v->string);
735
736         v->type &= VF_DONTTOUCH;
737         v->type |= VF_DIRTY;
738         v->string = NULL;
739         return v;
740 }
741
742 /* assign string value to variable */
743 static var *setvar_p(var *v, char *value)
744 {
745         clrvar(v);
746         v->string = value;
747         handle_special(v);
748         return v;
749 }
750
751 /* same as setvar_p but make a copy of string */
752 static var *setvar_s(var *v, const char *value)
753 {
754         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
755 }
756
757 /* same as setvar_s but set USER flag */
758 static var *setvar_u(var *v, const char *value)
759 {
760         setvar_s(v, value);
761         v->type |= VF_USER;
762         return v;
763 }
764
765 /* set array element to user string */
766 static void setari_u(var *a, int idx, const char *s)
767 {
768         var *v;
769
770         v = findvar(iamarray(a), itoa(idx));
771         setvar_u(v, s);
772 }
773
774 /* assign numeric value to variable */
775 static var *setvar_i(var *v, double value)
776 {
777         clrvar(v);
778         v->type |= VF_NUMBER;
779         v->number = value;
780         handle_special(v);
781         return v;
782 }
783
784 static const char *getvar_s(var *v)
785 {
786         /* if v is numeric and has no cached string, convert it to string */
787         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
788                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
789                 v->string = xstrdup(g_buf);
790                 v->type |= VF_CACHED;
791         }
792         return (v->string == NULL) ? "" : v->string;
793 }
794
795 static double getvar_i(var *v)
796 {
797         char *s;
798
799         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
800                 v->number = 0;
801                 s = v->string;
802                 if (s && *s) {
803                         v->number = my_strtod(&s);
804                         if (v->type & VF_USER) {
805                                 skip_spaces(&s);
806                                 if (*s != '\0')
807                                         v->type &= ~VF_USER;
808                         }
809                 } else {
810                         v->type &= ~VF_USER;
811                 }
812                 v->type |= VF_CACHED;
813         }
814         return v->number;
815 }
816
817 /* Used for operands of bitwise ops */
818 static unsigned long getvar_i_int(var *v)
819 {
820         double d = getvar_i(v);
821
822         /* Casting doubles to longs is undefined for values outside
823          * of target type range. Try to widen it as much as possible */
824         if (d >= 0)
825                 return (unsigned long)d;
826         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
827         return - (long) (unsigned long) (-d);
828 }
829
830 static var *copyvar(var *dest, const var *src)
831 {
832         if (dest != src) {
833                 clrvar(dest);
834                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
835                 dest->number = src->number;
836                 if (src->string)
837                         dest->string = xstrdup(src->string);
838         }
839         handle_special(dest);
840         return dest;
841 }
842
843 static var *incvar(var *v)
844 {
845         return setvar_i(v, getvar_i(v) + 1.);
846 }
847
848 /* return true if v is number or numeric string */
849 static int is_numeric(var *v)
850 {
851         getvar_i(v);
852         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
853 }
854
855 /* return 1 when value of v corresponds to true, 0 otherwise */
856 static int istrue(var *v)
857 {
858         if (is_numeric(v))
859                 return (v->number == 0) ? 0 : 1;
860         return (v->string && *(v->string)) ? 1 : 0;
861 }
862
863 /* temporary variables allocator. Last allocated should be first freed */
864 static var *nvalloc(int n)
865 {
866         nvblock *pb = NULL;
867         var *v, *r;
868         int size;
869
870         while (g_cb) {
871                 pb = g_cb;
872                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
873                 g_cb = g_cb->next;
874         }
875
876         if (!g_cb) {
877                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
878                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
879                 g_cb->size = size;
880                 g_cb->pos = g_cb->nv;
881                 g_cb->prev = pb;
882                 /*g_cb->next = NULL; - xzalloc did it */
883                 if (pb) pb->next = g_cb;
884         }
885
886         v = r = g_cb->pos;
887         g_cb->pos += n;
888
889         while (v < g_cb->pos) {
890                 v->type = 0;
891                 v->string = NULL;
892                 v++;
893         }
894
895         return r;
896 }
897
898 static void nvfree(var *v)
899 {
900         var *p;
901
902         if (v < g_cb->nv || v >= g_cb->pos)
903                 syntax_error(EMSG_INTERNAL_ERROR);
904
905         for (p = v; p < g_cb->pos; p++) {
906                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
907                         clear_array(iamarray(p));
908                         free(p->x.array->items);
909                         free(p->x.array);
910                 }
911                 if (p->type & VF_WALK)
912                         free(p->x.walker);
913
914                 clrvar(p);
915         }
916
917         g_cb->pos = v;
918         while (g_cb->prev && g_cb->pos == g_cb->nv) {
919                 g_cb = g_cb->prev;
920         }
921 }
922
923 /* ------- awk program text parsing ------- */
924
925 /* Parse next token pointed by global pos, place results into global ttt.
926  * If token isn't expected, give away. Return token class
927  */
928 static uint32_t next_token(uint32_t expected)
929 {
930 #define concat_inserted (G.next_token__concat_inserted)
931 #define save_tclass     (G.next_token__save_tclass)
932 #define save_info       (G.next_token__save_info)
933 /* Initialized to TC_OPTERM: */
934 #define ltclass         (G.next_token__ltclass)
935
936         char *p, *pp, *s;
937         const char *tl;
938         uint32_t tc;
939         const uint32_t *ti;
940         int l;
941
942         if (t_rollback) {
943                 t_rollback = FALSE;
944
945         } else if (concat_inserted) {
946                 concat_inserted = FALSE;
947                 t_tclass = save_tclass;
948                 t_info = save_info;
949
950         } else {
951                 p = g_pos;
952  readnext:
953                 skip_spaces(&p);
954                 g_lineno = t_lineno;
955                 if (*p == '#')
956                         while (*p != '\n' && *p != '\0')
957                                 p++;
958
959                 if (*p == '\n')
960                         t_lineno++;
961
962                 if (*p == '\0') {
963                         tc = TC_EOF;
964
965                 } else if (*p == '\"') {
966                         /* it's a string */
967                         t_string = s = ++p;
968                         while (*p != '\"') {
969                                 if (*p == '\0' || *p == '\n')
970                                         syntax_error(EMSG_UNEXP_EOS);
971                                 *(s++) = nextchar(&p);
972                         }
973                         p++;
974                         *s = '\0';
975                         tc = TC_STRING;
976
977                 } else if ((expected & TC_REGEXP) && *p == '/') {
978                         /* it's regexp */
979                         t_string = s = ++p;
980                         while (*p != '/') {
981                                 if (*p == '\0' || *p == '\n')
982                                         syntax_error(EMSG_UNEXP_EOS);
983                                 *s = *p++;
984                                 if (*s++ == '\\') {
985                                         pp = p;
986                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
987                                         if (*pp == '\\')
988                                                 *s++ = '\\';
989                                         if (p == pp)
990                                                 *s++ = *p++;
991                                 }
992                         }
993                         p++;
994                         *s = '\0';
995                         tc = TC_REGEXP;
996
997                 } else if (*p == '.' || isdigit(*p)) {
998                         /* it's a number */
999                         t_double = my_strtod(&p);
1000                         if (*p == '.')
1001                                 syntax_error(EMSG_UNEXP_TOKEN);
1002                         tc = TC_NUMBER;
1003
1004                 } else {
1005                         /* search for something known */
1006                         tl = tokenlist;
1007                         tc = 0x00000001;
1008                         ti = tokeninfo;
1009                         while (*tl) {
1010                                 l = *(tl++);
1011                                 if (l == NTCC) {
1012                                         tc <<= 1;
1013                                         continue;
1014                                 }
1015                                 /* if token class is expected, token
1016                                  * matches and it's not a longer word,
1017                                  * then this is what we are looking for
1018                                  */
1019                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1020                                  && *tl == *p && strncmp(p, tl, l) == 0
1021                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1022                                 ) {
1023                                         t_info = *ti;
1024                                         p += l;
1025                                         break;
1026                                 }
1027                                 ti++;
1028                                 tl += l;
1029                         }
1030
1031                         if (!*tl) {
1032                                 /* it's a name (var/array/function),
1033                                  * otherwise it's something wrong
1034                                  */
1035                                 if (!isalnum_(*p))
1036                                         syntax_error(EMSG_UNEXP_TOKEN);
1037
1038                                 t_string = --p;
1039                                 while (isalnum_(*(++p))) {
1040                                         *(p-1) = *p;
1041                                 }
1042                                 *(p-1) = '\0';
1043                                 tc = TC_VARIABLE;
1044                                 /* also consume whitespace between functionname and bracket */
1045                                 if (!(expected & TC_VARIABLE))
1046                                         skip_spaces(&p);
1047                                 if (*p == '(') {
1048                                         tc = TC_FUNCTION;
1049                                 } else {
1050                                         if (*p == '[') {
1051                                                 p++;
1052                                                 tc = TC_ARRAY;
1053                                         }
1054                                 }
1055                         }
1056                 }
1057                 g_pos = p;
1058
1059                 /* skipping newlines in some cases */
1060                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1061                         goto readnext;
1062
1063                 /* insert concatenation operator when needed */
1064                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1065                         concat_inserted = TRUE;
1066                         save_tclass = tc;
1067                         save_info = t_info;
1068                         tc = TC_BINOP;
1069                         t_info = OC_CONCAT | SS | P(35);
1070                 }
1071
1072                 t_tclass = tc;
1073         }
1074         ltclass = t_tclass;
1075
1076         /* Are we ready for this? */
1077         if (!(ltclass & expected))
1078                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1079                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1080
1081         return ltclass;
1082 #undef concat_inserted
1083 #undef save_tclass
1084 #undef save_info
1085 #undef ltclass
1086 }
1087
1088 static void rollback_token(void)
1089 {
1090         t_rollback = TRUE;
1091 }
1092
1093 static node *new_node(uint32_t info)
1094 {
1095         node *n;
1096
1097         n = xzalloc(sizeof(node));
1098         n->info = info;
1099         n->lineno = g_lineno;
1100         return n;
1101 }
1102
1103 static node *mk_re_node(const char *s, node *n, regex_t *re)
1104 {
1105         n->info = OC_REGEXP;
1106         n->l.re = re;
1107         n->r.ire = re + 1;
1108         xregcomp(re, s, REG_EXTENDED);
1109         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1110
1111         return n;
1112 }
1113
1114 static node *condition(void)
1115 {
1116         next_token(TC_SEQSTART);
1117         return parse_expr(TC_SEQTERM);
1118 }
1119
1120 /* parse expression terminated by given argument, return ptr
1121  * to built subtree. Terminator is eaten by parse_expr */
1122 static node *parse_expr(uint32_t iexp)
1123 {
1124         node sn;
1125         node *cn = &sn;
1126         node *vn, *glptr;
1127         uint32_t tc, xtc;
1128         var *v;
1129
1130         sn.info = PRIMASK;
1131         sn.r.n = glptr = NULL;
1132         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1133
1134         while (!((tc = next_token(xtc)) & iexp)) {
1135                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1136                         /* input redirection (<) attached to glptr node */
1137                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1138                         cn->a.n = glptr;
1139                         xtc = TC_OPERAND | TC_UOPPRE;
1140                         glptr = NULL;
1141
1142                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1143                         /* for binary and postfix-unary operators, jump back over
1144                          * previous operators with higher priority */
1145                         vn = cn;
1146                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1147                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1148                                 vn = vn->a.n;
1149                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1150                                 t_info += P(6);
1151                         cn = vn->a.n->r.n = new_node(t_info);
1152                         cn->a.n = vn->a.n;
1153                         if (tc & TC_BINOP) {
1154                                 cn->l.n = vn;
1155                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1156                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1157                                         /* it's a pipe */
1158                                         next_token(TC_GETLINE);
1159                                         /* give maximum priority to this pipe */
1160                                         cn->info &= ~PRIMASK;
1161                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1162                                 }
1163                         } else {
1164                                 cn->r.n = vn;
1165                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1166                         }
1167                         vn->a.n = cn;
1168
1169                 } else {
1170                         /* for operands and prefix-unary operators, attach them
1171                          * to last node */
1172                         vn = cn;
1173                         cn = vn->r.n = new_node(t_info);
1174                         cn->a.n = vn;
1175                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1176                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1177                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1178                                 /* one should be very careful with switch on tclass -
1179                                  * only simple tclasses should be used! */
1180                                 switch (tc) {
1181                                 case TC_VARIABLE:
1182                                 case TC_ARRAY:
1183                                         cn->info = OC_VAR;
1184                                         v = hash_search(ahash, t_string);
1185                                         if (v != NULL) {
1186                                                 cn->info = OC_FNARG;
1187                                                 cn->l.i = v->x.aidx;
1188                                         } else {
1189                                                 cn->l.v = newvar(t_string);
1190                                         }
1191                                         if (tc & TC_ARRAY) {
1192                                                 cn->info |= xS;
1193                                                 cn->r.n = parse_expr(TC_ARRTERM);
1194                                         }
1195                                         break;
1196
1197                                 case TC_NUMBER:
1198                                 case TC_STRING:
1199                                         cn->info = OC_VAR;
1200                                         v = cn->l.v = xzalloc(sizeof(var));
1201                                         if (tc & TC_NUMBER)
1202                                                 setvar_i(v, t_double);
1203                                         else
1204                                                 setvar_s(v, t_string);
1205                                         break;
1206
1207                                 case TC_REGEXP:
1208                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1209                                         break;
1210
1211                                 case TC_FUNCTION:
1212                                         cn->info = OC_FUNC;
1213                                         cn->r.f = newfunc(t_string);
1214                                         cn->l.n = condition();
1215                                         break;
1216
1217                                 case TC_SEQSTART:
1218                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1219                                         cn->a.n = vn;
1220                                         break;
1221
1222                                 case TC_GETLINE:
1223                                         glptr = cn;
1224                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1225                                         break;
1226
1227                                 case TC_BUILTIN:
1228                                         cn->l.n = condition();
1229                                         break;
1230                                 }
1231                         }
1232                 }
1233         }
1234         return sn.r.n;
1235 }
1236
1237 /* add node to chain. Return ptr to alloc'd node */
1238 static node *chain_node(uint32_t info)
1239 {
1240         node *n;
1241
1242         if (!seq->first)
1243                 seq->first = seq->last = new_node(0);
1244
1245         if (seq->programname != g_progname) {
1246                 seq->programname = g_progname;
1247                 n = chain_node(OC_NEWSOURCE);
1248                 n->l.s = xstrdup(g_progname);
1249         }
1250
1251         n = seq->last;
1252         n->info = info;
1253         seq->last = n->a.n = new_node(OC_DONE);
1254
1255         return n;
1256 }
1257
1258 static void chain_expr(uint32_t info)
1259 {
1260         node *n;
1261
1262         n = chain_node(info);
1263         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1264         if (t_tclass & TC_GRPTERM)
1265                 rollback_token();
1266 }
1267
1268 static node *chain_loop(node *nn)
1269 {
1270         node *n, *n2, *save_brk, *save_cont;
1271
1272         save_brk = break_ptr;
1273         save_cont = continue_ptr;
1274
1275         n = chain_node(OC_BR | Vx);
1276         continue_ptr = new_node(OC_EXEC);
1277         break_ptr = new_node(OC_EXEC);
1278         chain_group();
1279         n2 = chain_node(OC_EXEC | Vx);
1280         n2->l.n = nn;
1281         n2->a.n = n;
1282         continue_ptr->a.n = n2;
1283         break_ptr->a.n = n->r.n = seq->last;
1284
1285         continue_ptr = save_cont;
1286         break_ptr = save_brk;
1287
1288         return n;
1289 }
1290
1291 /* parse group and attach it to chain */
1292 static void chain_group(void)
1293 {
1294         uint32_t c;
1295         node *n, *n2, *n3;
1296
1297         do {
1298                 c = next_token(TC_GRPSEQ);
1299         } while (c & TC_NEWLINE);
1300
1301         if (c & TC_GRPSTART) {
1302                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1303                         if (t_tclass & TC_NEWLINE) continue;
1304                         rollback_token();
1305                         chain_group();
1306                 }
1307         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1308                 rollback_token();
1309                 chain_expr(OC_EXEC | Vx);
1310         } else {                                                /* TC_STATEMNT */
1311                 switch (t_info & OPCLSMASK) {
1312                 case ST_IF:
1313                         n = chain_node(OC_BR | Vx);
1314                         n->l.n = condition();
1315                         chain_group();
1316                         n2 = chain_node(OC_EXEC);
1317                         n->r.n = seq->last;
1318                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1319                                 chain_group();
1320                                 n2->a.n = seq->last;
1321                         } else {
1322                                 rollback_token();
1323                         }
1324                         break;
1325
1326                 case ST_WHILE:
1327                         n2 = condition();
1328                         n = chain_loop(NULL);
1329                         n->l.n = n2;
1330                         break;
1331
1332                 case ST_DO:
1333                         n2 = chain_node(OC_EXEC);
1334                         n = chain_loop(NULL);
1335                         n2->a.n = n->a.n;
1336                         next_token(TC_WHILE);
1337                         n->l.n = condition();
1338                         break;
1339
1340                 case ST_FOR:
1341                         next_token(TC_SEQSTART);
1342                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1343                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1344                                 if ((n2->info & OPCLSMASK) != OC_IN)
1345                                         syntax_error(EMSG_UNEXP_TOKEN);
1346                                 n = chain_node(OC_WALKINIT | VV);
1347                                 n->l.n = n2->l.n;
1348                                 n->r.n = n2->r.n;
1349                                 n = chain_loop(NULL);
1350                                 n->info = OC_WALKNEXT | Vx;
1351                                 n->l.n = n2->l.n;
1352                         } else {                        /* for (;;) */
1353                                 n = chain_node(OC_EXEC | Vx);
1354                                 n->l.n = n2;
1355                                 n2 = parse_expr(TC_SEMICOL);
1356                                 n3 = parse_expr(TC_SEQTERM);
1357                                 n = chain_loop(n3);
1358                                 n->l.n = n2;
1359                                 if (!n2)
1360                                         n->info = OC_EXEC;
1361                         }
1362                         break;
1363
1364                 case OC_PRINT:
1365                 case OC_PRINTF:
1366                         n = chain_node(t_info);
1367                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1368                         if (t_tclass & TC_OUTRDR) {
1369                                 n->info |= t_info;
1370                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1371                         }
1372                         if (t_tclass & TC_GRPTERM)
1373                                 rollback_token();
1374                         break;
1375
1376                 case OC_BREAK:
1377                         n = chain_node(OC_EXEC);
1378                         n->a.n = break_ptr;
1379                         break;
1380
1381                 case OC_CONTINUE:
1382                         n = chain_node(OC_EXEC);
1383                         n->a.n = continue_ptr;
1384                         break;
1385
1386                 /* delete, next, nextfile, return, exit */
1387                 default:
1388                         chain_expr(t_info);
1389                 }
1390         }
1391 }
1392
1393 static void parse_program(char *p)
1394 {
1395         uint32_t tclass;
1396         node *cn;
1397         func *f;
1398         var *v;
1399
1400         g_pos = p;
1401         t_lineno = 1;
1402         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1403                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1404
1405                 if (tclass & TC_OPTERM)
1406                         continue;
1407
1408                 seq = &mainseq;
1409                 if (tclass & TC_BEGIN) {
1410                         seq = &beginseq;
1411                         chain_group();
1412
1413                 } else if (tclass & TC_END) {
1414                         seq = &endseq;
1415                         chain_group();
1416
1417                 } else if (tclass & TC_FUNCDECL) {
1418                         next_token(TC_FUNCTION);
1419                         g_pos++;
1420                         f = newfunc(t_string);
1421                         f->body.first = NULL;
1422                         f->nargs = 0;
1423                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1424                                 v = findvar(ahash, t_string);
1425                                 v->x.aidx = (f->nargs)++;
1426
1427                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1428                                         break;
1429                         }
1430                         seq = &(f->body);
1431                         chain_group();
1432                         clear_array(ahash);
1433
1434                 } else if (tclass & TC_OPSEQ) {
1435                         rollback_token();
1436                         cn = chain_node(OC_TEST);
1437                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1438                         if (t_tclass & TC_GRPSTART) {
1439                                 rollback_token();
1440                                 chain_group();
1441                         } else {
1442                                 chain_node(OC_PRINT);
1443                         }
1444                         cn->r.n = mainseq.last;
1445
1446                 } else /* if (tclass & TC_GRPSTART) */ {
1447                         rollback_token();
1448                         chain_group();
1449                 }
1450         }
1451 }
1452
1453
1454 /* -------- program execution part -------- */
1455
1456 static node *mk_splitter(const char *s, tsplitter *spl)
1457 {
1458         regex_t *re, *ire;
1459         node *n;
1460
1461         re = &spl->re[0];
1462         ire = &spl->re[1];
1463         n = &spl->n;
1464         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1465                 regfree(re);
1466                 regfree(ire); // TODO: nuke ire, use re+1?
1467         }
1468         if (strlen(s) > 1) {
1469                 mk_re_node(s, n, re);
1470         } else {
1471                 n->info = (uint32_t) *s;
1472         }
1473
1474         return n;
1475 }
1476
1477 /* use node as a regular expression. Supplied with node ptr and regex_t
1478  * storage space. Return ptr to regex (if result points to preg, it should
1479  * be later regfree'd manually
1480  */
1481 static regex_t *as_regex(node *op, regex_t *preg)
1482 {
1483         int cflags;
1484         var *v;
1485         const char *s;
1486
1487         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1488                 return icase ? op->r.ire : op->l.re;
1489         }
1490         v = nvalloc(1);
1491         s = getvar_s(evaluate(op, v));
1492
1493         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1494         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1495          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1496          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1497          * (maybe gsub is not supposed to use REG_EXTENDED?).
1498          */
1499         if (regcomp(preg, s, cflags)) {
1500                 cflags &= ~REG_EXTENDED;
1501                 xregcomp(preg, s, cflags);
1502         }
1503         nvfree(v);
1504         return preg;
1505 }
1506
1507 /* gradually increasing buffer */
1508 static void qrealloc(char **b, int n, int *size)
1509 {
1510         if (!*b || n >= *size) {
1511                 *size = n + (n>>1) + 80;
1512                 *b = xrealloc(*b, *size);
1513         }
1514 }
1515
1516 /* resize field storage space */
1517 static void fsrealloc(int size)
1518 {
1519         int i;
1520
1521         if (size >= maxfields) {
1522                 i = maxfields;
1523                 maxfields = size + 16;
1524                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1525                 for (; i < maxfields; i++) {
1526                         Fields[i].type = VF_SPECIAL;
1527                         Fields[i].string = NULL;
1528                 }
1529         }
1530
1531         if (size < nfields) {
1532                 for (i = size; i < nfields; i++) {
1533                         clrvar(Fields + i);
1534                 }
1535         }
1536         nfields = size;
1537 }
1538
1539 static int awk_split(const char *s, node *spl, char **slist)
1540 {
1541         int l, n = 0;
1542         char c[4];
1543         char *s1;
1544         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1545
1546         /* in worst case, each char would be a separate field */
1547         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1548         strcpy(s1, s);
1549
1550         c[0] = c[1] = (char)spl->info;
1551         c[2] = c[3] = '\0';
1552         if (*getvar_s(intvar[RS]) == '\0')
1553                 c[2] = '\n';
1554
1555         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1556                 if (!*s)
1557                         return n; /* "": zero fields */
1558                 n++; /* at least one field will be there */
1559                 do {
1560                         l = strcspn(s, c+2); /* len till next NUL or \n */
1561                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1562                          && pmatch[0].rm_so <= l
1563                         ) {
1564                                 l = pmatch[0].rm_so;
1565                                 if (pmatch[0].rm_eo == 0) {
1566                                         l++;
1567                                         pmatch[0].rm_eo++;
1568                                 }
1569                                 n++; /* we saw yet another delimiter */
1570                         } else {
1571                                 pmatch[0].rm_eo = l;
1572                                 if (s[l])
1573                                         pmatch[0].rm_eo++;
1574                         }
1575                         memcpy(s1, s, l);
1576                         /* make sure we remove *all* of the separator chars */
1577                         do {
1578                                 s1[l] = '\0';
1579                         } while (++l < pmatch[0].rm_eo);
1580                         nextword(&s1);
1581                         s += pmatch[0].rm_eo;
1582                 } while (*s);
1583                 return n;
1584         }
1585         if (c[0] == '\0') {  /* null split */
1586                 while (*s) {
1587                         *s1++ = *s++;
1588                         *s1++ = '\0';
1589                         n++;
1590                 }
1591                 return n;
1592         }
1593         if (c[0] != ' ') {  /* single-character split */
1594                 if (icase) {
1595                         c[0] = toupper(c[0]);
1596                         c[1] = tolower(c[1]);
1597                 }
1598                 if (*s1) n++;
1599                 while ((s1 = strpbrk(s1, c))) {
1600                         *s1++ = '\0';
1601                         n++;
1602                 }
1603                 return n;
1604         }
1605         /* space split */
1606         while (*s) {
1607                 s = skip_whitespace(s);
1608                 if (!*s) break;
1609                 n++;
1610                 while (*s && !isspace(*s))
1611                         *s1++ = *s++;
1612                 *s1++ = '\0';
1613         }
1614         return n;
1615 }
1616
1617 static void split_f0(void)
1618 {
1619 /* static char *fstrings; */
1620 #define fstrings (G.split_f0__fstrings)
1621
1622         int i, n;
1623         char *s;
1624
1625         if (is_f0_split)
1626                 return;
1627
1628         is_f0_split = TRUE;
1629         free(fstrings);
1630         fsrealloc(0);
1631         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1632         fsrealloc(n);
1633         s = fstrings;
1634         for (i = 0; i < n; i++) {
1635                 Fields[i].string = nextword(&s);
1636                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1637         }
1638
1639         /* set NF manually to avoid side effects */
1640         clrvar(intvar[NF]);
1641         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1642         intvar[NF]->number = nfields;
1643 #undef fstrings
1644 }
1645
1646 /* perform additional actions when some internal variables changed */
1647 static void handle_special(var *v)
1648 {
1649         int n;
1650         char *b;
1651         const char *sep, *s;
1652         int sl, l, len, i, bsize;
1653
1654         if (!(v->type & VF_SPECIAL))
1655                 return;
1656
1657         if (v == intvar[NF]) {
1658                 n = (int)getvar_i(v);
1659                 fsrealloc(n);
1660
1661                 /* recalculate $0 */
1662                 sep = getvar_s(intvar[OFS]);
1663                 sl = strlen(sep);
1664                 b = NULL;
1665                 len = 0;
1666                 for (i = 0; i < n; i++) {
1667                         s = getvar_s(&Fields[i]);
1668                         l = strlen(s);
1669                         if (b) {
1670                                 memcpy(b+len, sep, sl);
1671                                 len += sl;
1672                         }
1673                         qrealloc(&b, len+l+sl, &bsize);
1674                         memcpy(b+len, s, l);
1675                         len += l;
1676                 }
1677                 if (b)
1678                         b[len] = '\0';
1679                 setvar_p(intvar[F0], b);
1680                 is_f0_split = TRUE;
1681
1682         } else if (v == intvar[F0]) {
1683                 is_f0_split = FALSE;
1684
1685         } else if (v == intvar[FS]) {
1686                 mk_splitter(getvar_s(v), &fsplitter);
1687
1688         } else if (v == intvar[RS]) {
1689                 mk_splitter(getvar_s(v), &rsplitter);
1690
1691         } else if (v == intvar[IGNORECASE]) {
1692                 icase = istrue(v);
1693
1694         } else {                                /* $n */
1695                 n = getvar_i(intvar[NF]);
1696                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1697                 /* right here v is invalid. Just to note... */
1698         }
1699 }
1700
1701 /* step through func/builtin/etc arguments */
1702 static node *nextarg(node **pn)
1703 {
1704         node *n;
1705
1706         n = *pn;
1707         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1708                 *pn = n->r.n;
1709                 n = n->l.n;
1710         } else {
1711                 *pn = NULL;
1712         }
1713         return n;
1714 }
1715
1716 static void hashwalk_init(var *v, xhash *array)
1717 {
1718         char **w;
1719         hash_item *hi;
1720         unsigned i;
1721
1722         if (v->type & VF_WALK)
1723                 free(v->x.walker);
1724
1725         v->type |= VF_WALK;
1726         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1727         w[0] = w[1] = (char *)(w + 2);
1728         for (i = 0; i < array->csize; i++) {
1729                 hi = array->items[i];
1730                 while (hi) {
1731                         strcpy(*w, hi->name);
1732                         nextword(w);
1733                         hi = hi->next;
1734                 }
1735         }
1736 }
1737
1738 static int hashwalk_next(var *v)
1739 {
1740         char **w;
1741
1742         w = v->x.walker;
1743         if (w[1] == w[0])
1744                 return FALSE;
1745
1746         setvar_s(v, nextword(w+1));
1747         return TRUE;
1748 }
1749
1750 /* evaluate node, return 1 when result is true, 0 otherwise */
1751 static int ptest(node *pattern)
1752 {
1753         /* ptest__v is "static": to save stack space? */
1754         return istrue(evaluate(pattern, &G.ptest__v));
1755 }
1756
1757 /* read next record from stream rsm into a variable v */
1758 static int awk_getline(rstream *rsm, var *v)
1759 {
1760         char *b;
1761         regmatch_t pmatch[2];
1762         int a, p, pp=0, size;
1763         int fd, so, eo, r, rp;
1764         char c, *m, *s;
1765
1766         /* we're using our own buffer since we need access to accumulating
1767          * characters
1768          */
1769         fd = fileno(rsm->F);
1770         m = rsm->buffer;
1771         a = rsm->adv;
1772         p = rsm->pos;
1773         size = rsm->size;
1774         c = (char) rsplitter.n.info;
1775         rp = 0;
1776
1777         if (!m) qrealloc(&m, 256, &size);
1778         do {
1779                 b = m + a;
1780                 so = eo = p;
1781                 r = 1;
1782                 if (p > 0) {
1783                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1784                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1785                                                         b, 1, pmatch, 0) == 0) {
1786                                         so = pmatch[0].rm_so;
1787                                         eo = pmatch[0].rm_eo;
1788                                         if (b[eo] != '\0')
1789                                                 break;
1790                                 }
1791                         } else if (c != '\0') {
1792                                 s = strchr(b+pp, c);
1793                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1794                                 if (s) {
1795                                         so = eo = s-b;
1796                                         eo++;
1797                                         break;
1798                                 }
1799                         } else {
1800                                 while (b[rp] == '\n')
1801                                         rp++;
1802                                 s = strstr(b+rp, "\n\n");
1803                                 if (s) {
1804                                         so = eo = s-b;
1805                                         while (b[eo] == '\n') eo++;
1806                                         if (b[eo] != '\0')
1807                                                 break;
1808                                 }
1809                         }
1810                 }
1811
1812                 if (a > 0) {
1813                         memmove(m, (const void *)(m+a), p+1);
1814                         b = m;
1815                         a = 0;
1816                 }
1817
1818                 qrealloc(&m, a+p+128, &size);
1819                 b = m + a;
1820                 pp = p;
1821                 p += safe_read(fd, b+p, size-p-1);
1822                 if (p < pp) {
1823                         p = 0;
1824                         r = 0;
1825                         setvar_i(intvar[ERRNO], errno);
1826                 }
1827                 b[p] = '\0';
1828
1829         } while (p > pp);
1830
1831         if (p == 0) {
1832                 r--;
1833         } else {
1834                 c = b[so]; b[so] = '\0';
1835                 setvar_s(v, b+rp);
1836                 v->type |= VF_USER;
1837                 b[so] = c;
1838                 c = b[eo]; b[eo] = '\0';
1839                 setvar_s(intvar[RT], b+so);
1840                 b[eo] = c;
1841         }
1842
1843         rsm->buffer = m;
1844         rsm->adv = a + eo;
1845         rsm->pos = p - eo;
1846         rsm->size = size;
1847
1848         return r;
1849 }
1850
1851 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1852 {
1853         int r = 0;
1854         char c;
1855         const char *s = format;
1856
1857         if (int_as_int && n == (int)n) {
1858                 r = snprintf(b, size, "%d", (int)n);
1859         } else {
1860                 do { c = *s; } while (c && *++s);
1861                 if (strchr("diouxX", c)) {
1862                         r = snprintf(b, size, format, (int)n);
1863                 } else if (strchr("eEfgG", c)) {
1864                         r = snprintf(b, size, format, n);
1865                 } else {
1866                         syntax_error(EMSG_INV_FMT);
1867                 }
1868         }
1869         return r;
1870 }
1871
1872 /* formatted output into an allocated buffer, return ptr to buffer */
1873 static char *awk_printf(node *n)
1874 {
1875         char *b = NULL;
1876         char *fmt, *s, *f;
1877         const char *s1;
1878         int i, j, incr, bsize;
1879         char c, c1;
1880         var *v, *arg;
1881
1882         v = nvalloc(1);
1883         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1884
1885         i = 0;
1886         while (*f) {
1887                 s = f;
1888                 while (*f && (*f != '%' || *(++f) == '%'))
1889                         f++;
1890                 while (*f && !isalpha(*f)) {
1891                         if (*f == '*')
1892                                 syntax_error("%*x formats are not supported");
1893                         f++;
1894                 }
1895
1896                 incr = (f - s) + MAXVARFMT;
1897                 qrealloc(&b, incr + i, &bsize);
1898                 c = *f;
1899                 if (c != '\0') f++;
1900                 c1 = *f;
1901                 *f = '\0';
1902                 arg = evaluate(nextarg(&n), v);
1903
1904                 j = i;
1905                 if (c == 'c' || !c) {
1906                         i += sprintf(b+i, s, is_numeric(arg) ?
1907                                         (char)getvar_i(arg) : *getvar_s(arg));
1908                 } else if (c == 's') {
1909                         s1 = getvar_s(arg);
1910                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1911                         i += sprintf(b+i, s, s1);
1912                 } else {
1913                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1914                 }
1915                 *f = c1;
1916
1917                 /* if there was an error while sprintf, return value is negative */
1918                 if (i < j) i = j;
1919         }
1920
1921         b = xrealloc(b, i + 1);
1922         free(fmt);
1923         nvfree(v);
1924         b[i] = '\0';
1925         return b;
1926 }
1927
1928 /* common substitution routine
1929  * replace (nm) substring of (src) that match (n) with (repl), store
1930  * result into (dest), return number of substitutions. If nm=0, replace
1931  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1932  * subexpression matching (\1-\9)
1933  */
1934 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1935 {
1936         char *ds = NULL;
1937         const char *s;
1938         const char *sp;
1939         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1940         regmatch_t pmatch[10];
1941         regex_t sreg, *re;
1942
1943         re = as_regex(rn, &sreg);
1944         if (!src) src = intvar[F0];
1945         if (!dest) dest = intvar[F0];
1946
1947         i = di = 0;
1948         sp = getvar_s(src);
1949         rl = strlen(repl);
1950         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1951                 so = pmatch[0].rm_so;
1952                 eo = pmatch[0].rm_eo;
1953
1954                 qrealloc(&ds, di + eo + rl, &dssize);
1955                 memcpy(ds + di, sp, eo);
1956                 di += eo;
1957                 if (++i >= nm) {
1958                         /* replace */
1959                         di -= (eo - so);
1960                         nbs = 0;
1961                         for (s = repl; *s; s++) {
1962                                 ds[di++] = c = *s;
1963                                 if (c == '\\') {
1964                                         nbs++;
1965                                         continue;
1966                                 }
1967                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1968                                         di -= ((nbs + 3) >> 1);
1969                                         j = 0;
1970                                         if (c != '&') {
1971                                                 j = c - '0';
1972                                                 nbs++;
1973                                         }
1974                                         if (nbs % 2) {
1975                                                 ds[di++] = c;
1976                                         } else {
1977                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1978                                                 qrealloc(&ds, di + rl + n, &dssize);
1979                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1980                                                 di += n;
1981                                         }
1982                                 }
1983                                 nbs = 0;
1984                         }
1985                 }
1986
1987                 sp += eo;
1988                 if (i == nm) break;
1989                 if (eo == so) {
1990                         ds[di] = *sp++;
1991                         if (!ds[di++]) break;
1992                 }
1993         }
1994
1995         qrealloc(&ds, di + strlen(sp), &dssize);
1996         strcpy(ds + di, sp);
1997         setvar_p(dest, ds);
1998         if (re == &sreg) regfree(re);
1999         return i;
2000 }
2001
2002 static NOINLINE int do_mktime(const char *ds)
2003 {
2004         struct tm then;
2005         int count;
2006
2007         /*memset(&then, 0, sizeof(then)); - not needed */
2008         then.tm_isdst = -1; /* default is unknown */
2009
2010         /* manpage of mktime says these fields are ints,
2011          * so we can sscanf stuff directly into them */
2012         count = sscanf(ds, "%u %u %u %u %u %u %d",
2013                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2014                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2015                 &then.tm_isdst);
2016
2017         if (count < 6
2018          || (unsigned)then.tm_mon < 1
2019          || (unsigned)then.tm_year < 1900
2020         ) {
2021                 return -1;
2022         }
2023
2024         then.tm_mon -= 1;
2025         then.tm_year -= 1900;
2026
2027         return mktime(&then);
2028 }
2029
2030 static NOINLINE var *exec_builtin(node *op, var *res)
2031 {
2032 #define tspl (G.exec_builtin__tspl)
2033
2034         var *tv;
2035         node *an[4];
2036         var *av[4];
2037         const char *as[4];
2038         regmatch_t pmatch[2];
2039         regex_t sreg, *re;
2040         node *spl;
2041         uint32_t isr, info;
2042         int nargs;
2043         time_t tt;
2044         char *s, *s1;
2045         int i, l, ll, n;
2046
2047         tv = nvalloc(4);
2048         isr = info = op->info;
2049         op = op->l.n;
2050
2051         av[2] = av[3] = NULL;
2052         for (i = 0; i < 4 && op; i++) {
2053                 an[i] = nextarg(&op);
2054                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2055                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2056                 isr >>= 1;
2057         }
2058
2059         nargs = i;
2060         if ((uint32_t)nargs < (info >> 30))
2061                 syntax_error(EMSG_TOO_FEW_ARGS);
2062
2063         info &= OPNMASK;
2064         switch (info) {
2065
2066         case B_a2:
2067 #if ENABLE_FEATURE_AWK_LIBM
2068                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2069 #else
2070                 syntax_error(EMSG_NO_MATH);
2071 #endif
2072                 break;
2073
2074         case B_sp:
2075                 if (nargs > 2) {
2076                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2077                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2078                 } else {
2079                         spl = &fsplitter.n;
2080                 }
2081
2082                 n = awk_split(as[0], spl, &s);
2083                 s1 = s;
2084                 clear_array(iamarray(av[1]));
2085                 for (i = 1; i <= n; i++)
2086                         setari_u(av[1], i, nextword(&s1));
2087                 free(s);
2088                 setvar_i(res, n);
2089                 break;
2090
2091         case B_ss:
2092                 l = strlen(as[0]);
2093                 i = getvar_i(av[1]) - 1;
2094                 if (i > l) i = l;
2095                 if (i < 0) i = 0;
2096                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2097                 if (n < 0) n = 0;
2098                 s = xstrndup(as[0]+i, n);
2099                 setvar_p(res, s);
2100                 break;
2101
2102         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2103          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2104         case B_an:
2105                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2106                 break;
2107
2108         case B_co:
2109                 setvar_i(res, ~getvar_i_int(av[0]));
2110                 break;
2111
2112         case B_ls:
2113                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2114                 break;
2115
2116         case B_or:
2117                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2118                 break;
2119
2120         case B_rs:
2121                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2122                 break;
2123
2124         case B_xo:
2125                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2126                 break;
2127
2128         case B_lo:
2129         case B_up:
2130                 s1 = s = xstrdup(as[0]);
2131                 while (*s1) {
2132                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2133                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2134                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2135                         s1++;
2136                 }
2137                 setvar_p(res, s);
2138                 break;
2139
2140         case B_ix:
2141                 n = 0;
2142                 ll = strlen(as[1]);
2143                 l = strlen(as[0]) - ll;
2144                 if (ll > 0 && l >= 0) {
2145                         if (!icase) {
2146                                 s = strstr(as[0], as[1]);
2147                                 if (s) n = (s - as[0]) + 1;
2148                         } else {
2149                                 /* this piece of code is terribly slow and
2150                                  * really should be rewritten
2151                                  */
2152                                 for (i=0; i<=l; i++) {
2153                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2154                                                 n = i+1;
2155                                                 break;
2156                                         }
2157                                 }
2158                         }
2159                 }
2160                 setvar_i(res, n);
2161                 break;
2162
2163         case B_ti:
2164                 if (nargs > 1)
2165                         tt = getvar_i(av[1]);
2166                 else
2167                         time(&tt);
2168                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2169                 i = strftime(g_buf, MAXVARFMT,
2170                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2171                         localtime(&tt));
2172                 g_buf[i] = '\0';
2173                 setvar_s(res, g_buf);
2174                 break;
2175
2176         case B_mt:
2177                 setvar_i(res, do_mktime(as[0]));
2178                 break;
2179
2180         case B_ma:
2181                 re = as_regex(an[1], &sreg);
2182                 n = regexec(re, as[0], 1, pmatch, 0);
2183                 if (n == 0) {
2184                         pmatch[0].rm_so++;
2185                         pmatch[0].rm_eo++;
2186                 } else {
2187                         pmatch[0].rm_so = 0;
2188                         pmatch[0].rm_eo = -1;
2189                 }
2190                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2191                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2192                 setvar_i(res, pmatch[0].rm_so);
2193                 if (re == &sreg) regfree(re);
2194                 break;
2195
2196         case B_ge:
2197                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2198                 break;
2199
2200         case B_gs:
2201                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2202                 break;
2203
2204         case B_su:
2205                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2206                 break;
2207         }
2208
2209         nvfree(tv);
2210         return res;
2211 #undef tspl
2212 }
2213
2214 /*
2215  * Evaluate node - the heart of the program. Supplied with subtree
2216  * and place where to store result. returns ptr to result.
2217  */
2218 #define XC(n) ((n) >> 8)
2219
2220 static var *evaluate(node *op, var *res)
2221 {
2222 /* This procedure is recursive so we should count every byte */
2223 #define fnargs (G.evaluate__fnargs)
2224 /* seed is initialized to 1 */
2225 #define seed   (G.evaluate__seed)
2226 #define sreg   (G.evaluate__sreg)
2227
2228         node *op1;
2229         var *v1;
2230         union {
2231                 var *v;
2232                 const char *s;
2233                 double d;
2234                 int i;
2235         } L, R;
2236         uint32_t opinfo;
2237         int opn;
2238         union {
2239                 char *s;
2240                 rstream *rsm;
2241                 FILE *F;
2242                 var *v;
2243                 regex_t *re;
2244                 uint32_t info;
2245         } X;
2246
2247         if (!op)
2248                 return setvar_s(res, NULL);
2249
2250         v1 = nvalloc(2);
2251
2252         while (op) {
2253                 opinfo = op->info;
2254                 opn = (opinfo & OPNMASK);
2255                 g_lineno = op->lineno;
2256
2257                 /* execute inevitable things */
2258                 op1 = op->l.n;
2259                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2260                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2261                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2262                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2263                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2264
2265                 switch (XC(opinfo & OPCLSMASK)) {
2266
2267                 /* -- iterative node type -- */
2268
2269                 /* test pattern */
2270                 case XC( OC_TEST ):
2271                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2272                                 /* it's range pattern */
2273                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2274                                         op->info |= OF_CHECKED;
2275                                         if (ptest(op1->r.n))
2276                                                 op->info &= ~OF_CHECKED;
2277
2278                                         op = op->a.n;
2279                                 } else {
2280                                         op = op->r.n;
2281                                 }
2282                         } else {
2283                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2284                         }
2285                         break;
2286
2287                 /* just evaluate an expression, also used as unconditional jump */
2288                 case XC( OC_EXEC ):
2289                         break;
2290
2291                 /* branch, used in if-else and various loops */
2292                 case XC( OC_BR ):
2293                         op = istrue(L.v) ? op->a.n : op->r.n;
2294                         break;
2295
2296                 /* initialize for-in loop */
2297                 case XC( OC_WALKINIT ):
2298                         hashwalk_init(L.v, iamarray(R.v));
2299                         break;
2300
2301                 /* get next array item */
2302                 case XC( OC_WALKNEXT ):
2303                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2304                         break;
2305
2306                 case XC( OC_PRINT ):
2307                 case XC( OC_PRINTF ):
2308                         X.F = stdout;
2309                         if (op->r.n) {
2310                                 X.rsm = newfile(R.s);
2311                                 if (!X.rsm->F) {
2312                                         if (opn == '|') {
2313                                                 X.rsm->F = popen(R.s, "w");
2314                                                 if (X.rsm->F == NULL)
2315                                                         bb_perror_msg_and_die("popen");
2316                                                 X.rsm->is_pipe = 1;
2317                                         } else {
2318                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2319                                         }
2320                                 }
2321                                 X.F = X.rsm->F;
2322                         }
2323
2324                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2325                                 if (!op1) {
2326                                         fputs(getvar_s(intvar[F0]), X.F);
2327                                 } else {
2328                                         while (op1) {
2329                                                 L.v = evaluate(nextarg(&op1), v1);
2330                                                 if (L.v->type & VF_NUMBER) {
2331                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2332                                                                         getvar_i(L.v), TRUE);
2333                                                         fputs(g_buf, X.F);
2334                                                 } else {
2335                                                         fputs(getvar_s(L.v), X.F);
2336                                                 }
2337
2338                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2339                                         }
2340                                 }
2341                                 fputs(getvar_s(intvar[ORS]), X.F);
2342
2343                         } else {        /* OC_PRINTF */
2344                                 L.s = awk_printf(op1);
2345                                 fputs(L.s, X.F);
2346                                 free((char*)L.s);
2347                         }
2348                         fflush(X.F);
2349                         break;
2350
2351                 case XC( OC_DELETE ):
2352                         X.info = op1->info & OPCLSMASK;
2353                         if (X.info == OC_VAR) {
2354                                 R.v = op1->l.v;
2355                         } else if (X.info == OC_FNARG) {
2356                                 R.v = &fnargs[op1->l.i];
2357                         } else {
2358                                 syntax_error(EMSG_NOT_ARRAY);
2359                         }
2360
2361                         if (op1->r.n) {
2362                                 clrvar(L.v);
2363                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2364                                 hash_remove(iamarray(R.v), L.s);
2365                         } else {
2366                                 clear_array(iamarray(R.v));
2367                         }
2368                         break;
2369
2370                 case XC( OC_NEWSOURCE ):
2371                         g_progname = op->l.s;
2372                         break;
2373
2374                 case XC( OC_RETURN ):
2375                         copyvar(res, L.v);
2376                         break;
2377
2378                 case XC( OC_NEXTFILE ):
2379                         nextfile = TRUE;
2380                 case XC( OC_NEXT ):
2381                         nextrec = TRUE;
2382                 case XC( OC_DONE ):
2383                         clrvar(res);
2384                         break;
2385
2386                 case XC( OC_EXIT ):
2387                         awk_exit(L.d);
2388
2389                 /* -- recursive node type -- */
2390
2391                 case XC( OC_VAR ):
2392                         L.v = op->l.v;
2393                         if (L.v == intvar[NF])
2394                                 split_f0();
2395                         goto v_cont;
2396
2397                 case XC( OC_FNARG ):
2398                         L.v = &fnargs[op->l.i];
2399  v_cont:
2400                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2401                         break;
2402
2403                 case XC( OC_IN ):
2404                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2405                         break;
2406
2407                 case XC( OC_REGEXP ):
2408                         op1 = op;
2409                         L.s = getvar_s(intvar[F0]);
2410                         goto re_cont;
2411
2412                 case XC( OC_MATCH ):
2413                         op1 = op->r.n;
2414  re_cont:
2415                         X.re = as_regex(op1, &sreg);
2416                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2417                         if (X.re == &sreg) regfree(X.re);
2418                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2419                         break;
2420
2421                 case XC( OC_MOVE ):
2422                         /* if source is a temporary string, jusk relink it to dest */
2423                         if (R.v == v1+1 && R.v->string) {
2424                                 res = setvar_p(L.v, R.v->string);
2425                                 R.v->string = NULL;
2426                         } else {
2427                                 res = copyvar(L.v, R.v);
2428                         }
2429                         break;
2430
2431                 case XC( OC_TERNARY ):
2432                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2433                                 syntax_error(EMSG_POSSIBLE_ERROR);
2434                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2435                         break;
2436
2437                 case XC( OC_FUNC ):
2438                         if (!op->r.f->body.first)
2439                                 syntax_error(EMSG_UNDEF_FUNC);
2440
2441                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2442                         while (op1) {
2443                                 L.v = evaluate(nextarg(&op1), v1);
2444                                 copyvar(R.v, L.v);
2445                                 R.v->type |= VF_CHILD;
2446                                 R.v->x.parent = L.v;
2447                                 if (++R.v - X.v >= op->r.f->nargs)
2448                                         break;
2449                         }
2450
2451                         R.v = fnargs;
2452                         fnargs = X.v;
2453
2454                         L.s = g_progname;
2455                         res = evaluate(op->r.f->body.first, res);
2456                         g_progname = L.s;
2457
2458                         nvfree(fnargs);
2459                         fnargs = R.v;
2460                         break;
2461
2462                 case XC( OC_GETLINE ):
2463                 case XC( OC_PGETLINE ):
2464                         if (op1) {
2465                                 X.rsm = newfile(L.s);
2466                                 if (!X.rsm->F) {
2467                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2468                                                 X.rsm->F = popen(L.s, "r");
2469                                                 X.rsm->is_pipe = TRUE;
2470                                         } else {
2471                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2472                                         }
2473                                 }
2474                         } else {
2475                                 if (!iF) iF = next_input_file();
2476                                 X.rsm = iF;
2477                         }
2478
2479                         if (!X.rsm->F) {
2480                                 setvar_i(intvar[ERRNO], errno);
2481                                 setvar_i(res, -1);
2482                                 break;
2483                         }
2484
2485                         if (!op->r.n)
2486                                 R.v = intvar[F0];
2487
2488                         L.i = awk_getline(X.rsm, R.v);
2489                         if (L.i > 0) {
2490                                 if (!op1) {
2491                                         incvar(intvar[FNR]);
2492                                         incvar(intvar[NR]);
2493                                 }
2494                         }
2495                         setvar_i(res, L.i);
2496                         break;
2497
2498                 /* simple builtins */
2499                 case XC( OC_FBLTIN ):
2500                         switch (opn) {
2501
2502                         case F_in:
2503                                 R.d = (int)L.d;
2504                                 break;
2505
2506                         case F_rn:
2507                                 R.d = (double)rand() / (double)RAND_MAX;
2508                                 break;
2509 #if ENABLE_FEATURE_AWK_LIBM
2510                         case F_co:
2511                                 R.d = cos(L.d);
2512                                 break;
2513
2514                         case F_ex:
2515                                 R.d = exp(L.d);
2516                                 break;
2517
2518                         case F_lg:
2519                                 R.d = log(L.d);
2520                                 break;
2521
2522                         case F_si:
2523                                 R.d = sin(L.d);
2524                                 break;
2525
2526                         case F_sq:
2527                                 R.d = sqrt(L.d);
2528                                 break;
2529 #else
2530                         case F_co:
2531                         case F_ex:
2532                         case F_lg:
2533                         case F_si:
2534                         case F_sq:
2535                                 syntax_error(EMSG_NO_MATH);
2536                                 break;
2537 #endif
2538                         case F_sr:
2539                                 R.d = (double)seed;
2540                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2541                                 srand(seed);
2542                                 break;
2543
2544                         case F_ti:
2545                                 R.d = time(NULL);
2546                                 break;
2547
2548                         case F_le:
2549                                 if (!op1)
2550                                         L.s = getvar_s(intvar[F0]);
2551                                 R.d = strlen(L.s);
2552                                 break;
2553
2554                         case F_sy:
2555                                 fflush(NULL);
2556                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2557                                                 ? (system(L.s) >> 8) : 0;
2558                                 break;
2559
2560                         case F_ff:
2561                                 if (!op1)
2562                                         fflush(stdout);
2563                                 else {
2564                                         if (L.s && *L.s) {
2565                                                 X.rsm = newfile(L.s);
2566                                                 fflush(X.rsm->F);
2567                                         } else {
2568                                                 fflush(NULL);
2569                                         }
2570                                 }
2571                                 break;
2572
2573                         case F_cl:
2574                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2575                                 if (X.rsm) {
2576                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2577                                         free(X.rsm->buffer);
2578                                         hash_remove(fdhash, L.s);
2579                                 }
2580                                 if (R.i != 0)
2581                                         setvar_i(intvar[ERRNO], errno);
2582                                 R.d = (double)R.i;
2583                                 break;
2584                         }
2585                         setvar_i(res, R.d);
2586                         break;
2587
2588                 case XC( OC_BUILTIN ):
2589                         res = exec_builtin(op, res);
2590                         break;
2591
2592                 case XC( OC_SPRINTF ):
2593                         setvar_p(res, awk_printf(op1));
2594                         break;
2595
2596                 case XC( OC_UNARY ):
2597                         X.v = R.v;
2598                         L.d = R.d = getvar_i(R.v);
2599                         switch (opn) {
2600                         case 'P':
2601                                 L.d = ++R.d;
2602                                 goto r_op_change;
2603                         case 'p':
2604                                 R.d++;
2605                                 goto r_op_change;
2606                         case 'M':
2607                                 L.d = --R.d;
2608                                 goto r_op_change;
2609                         case 'm':
2610                                 R.d--;
2611                                 goto r_op_change;
2612                         case '!':
2613                                 L.d = istrue(X.v) ? 0 : 1;
2614                                 break;
2615                         case '-':
2616                                 L.d = -R.d;
2617                                 break;
2618  r_op_change:
2619                                 setvar_i(X.v, R.d);
2620                         }
2621                         setvar_i(res, L.d);
2622                         break;
2623
2624                 case XC( OC_FIELD ):
2625                         R.i = (int)getvar_i(R.v);
2626                         if (R.i == 0) {
2627                                 res = intvar[F0];
2628                         } else {
2629                                 split_f0();
2630                                 if (R.i > nfields)
2631                                         fsrealloc(R.i);
2632                                 res = &Fields[R.i - 1];
2633                         }
2634                         break;
2635
2636                 /* concatenation (" ") and index joining (",") */
2637                 case XC( OC_CONCAT ):
2638                 case XC( OC_COMMA ):
2639                         opn = strlen(L.s) + strlen(R.s) + 2;
2640                         X.s = xmalloc(opn);
2641                         strcpy(X.s, L.s);
2642                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2643                                 L.s = getvar_s(intvar[SUBSEP]);
2644                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2645                                 strcat(X.s, L.s);
2646                         }
2647                         strcat(X.s, R.s);
2648                         setvar_p(res, X.s);
2649                         break;
2650
2651                 case XC( OC_LAND ):
2652                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2653                         break;
2654
2655                 case XC( OC_LOR ):
2656                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2657                         break;
2658
2659                 case XC( OC_BINARY ):
2660                 case XC( OC_REPLACE ):
2661                         R.d = getvar_i(R.v);
2662                         switch (opn) {
2663                         case '+':
2664                                 L.d += R.d;
2665                                 break;
2666                         case '-':
2667                                 L.d -= R.d;
2668                                 break;
2669                         case '*':
2670                                 L.d *= R.d;
2671                                 break;
2672                         case '/':
2673                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2674                                 L.d /= R.d;
2675                                 break;
2676                         case '&':
2677 #if ENABLE_FEATURE_AWK_LIBM
2678                                 L.d = pow(L.d, R.d);
2679 #else
2680                                 syntax_error(EMSG_NO_MATH);
2681 #endif
2682                                 break;
2683                         case '%':
2684                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2685                                 L.d -= (int)(L.d / R.d) * R.d;
2686                                 break;
2687                         }
2688                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2689                         break;
2690
2691                 case XC( OC_COMPARE ):
2692                         if (is_numeric(L.v) && is_numeric(R.v)) {
2693                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2694                         } else {
2695                                 L.s = getvar_s(L.v);
2696                                 R.s = getvar_s(R.v);
2697                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2698                         }
2699                         switch (opn & 0xfe) {
2700                         case 0:
2701                                 R.i = (L.d > 0);
2702                                 break;
2703                         case 2:
2704                                 R.i = (L.d >= 0);
2705                                 break;
2706                         case 4:
2707                                 R.i = (L.d == 0);
2708                                 break;
2709                         }
2710                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2711                         break;
2712
2713                 default:
2714                         syntax_error(EMSG_POSSIBLE_ERROR);
2715                 }
2716                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2717                         op = op->a.n;
2718                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2719                         break;
2720                 if (nextrec)
2721                         break;
2722         }
2723         nvfree(v1);
2724         return res;
2725 #undef fnargs
2726 #undef seed
2727 #undef sreg
2728 }
2729
2730
2731 /* -------- main & co. -------- */
2732
2733 static int awk_exit(int r)
2734 {
2735         var tv;
2736         unsigned i;
2737         hash_item *hi;
2738
2739         zero_out_var(&tv);
2740
2741         if (!exiting) {
2742                 exiting = TRUE;
2743                 nextrec = FALSE;
2744                 evaluate(endseq.first, &tv);
2745         }
2746
2747         /* waiting for children */
2748         for (i = 0; i < fdhash->csize; i++) {
2749                 hi = fdhash->items[i];
2750                 while (hi) {
2751                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2752                                 pclose(hi->data.rs.F);
2753                         hi = hi->next;
2754                 }
2755         }
2756
2757         exit(r);
2758 }
2759
2760 /* if expr looks like "var=value", perform assignment and return 1,
2761  * otherwise return 0 */
2762 static int is_assignment(const char *expr)
2763 {
2764         char *exprc, *s, *s0, *s1;
2765
2766         exprc = xstrdup(expr);
2767         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2768                 free(exprc);
2769                 return FALSE;
2770         }
2771
2772         *(s++) = '\0';
2773         s0 = s1 = s;
2774         while (*s)
2775                 *(s1++) = nextchar(&s);
2776
2777         *s1 = '\0';
2778         setvar_u(newvar(exprc), s0);
2779         free(exprc);
2780         return TRUE;
2781 }
2782
2783 /* switch to next input file */
2784 static rstream *next_input_file(void)
2785 {
2786 #define rsm          (G.next_input_file__rsm)
2787 #define files_happen (G.next_input_file__files_happen)
2788
2789         FILE *F = NULL;
2790         const char *fname, *ind;
2791
2792         if (rsm.F) fclose(rsm.F);
2793         rsm.F = NULL;
2794         rsm.pos = rsm.adv = 0;
2795
2796         do {
2797                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2798                         if (files_happen)
2799                                 return NULL;
2800                         fname = "-";
2801                         F = stdin;
2802                 } else {
2803                         ind = getvar_s(incvar(intvar[ARGIND]));
2804                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2805                         if (fname && *fname && !is_assignment(fname))
2806                                 F = xfopen_stdin(fname);
2807                 }
2808         } while (!F);
2809
2810         files_happen = TRUE;
2811         setvar_s(intvar[FILENAME], fname);
2812         rsm.F = F;
2813         return &rsm;
2814 #undef rsm
2815 #undef files_happen
2816 }
2817
2818 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2819 int awk_main(int argc, char **argv)
2820 {
2821         unsigned opt;
2822         char *opt_F, *opt_W;
2823         llist_t *list_v = NULL;
2824         llist_t *list_f = NULL;
2825         int i, j;
2826         var *v;
2827         var tv;
2828         char **envp;
2829         char *vnames = (char *)vNames; /* cheat */
2830         char *vvalues = (char *)vValues;
2831
2832         INIT_G();
2833
2834         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2835          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2836         if (ENABLE_LOCALE_SUPPORT)
2837                 setlocale(LC_NUMERIC, "C");
2838
2839         zero_out_var(&tv);
2840
2841         /* allocate global buffer */
2842         g_buf = xmalloc(MAXVARFMT + 1);
2843
2844         vhash = hash_init();
2845         ahash = hash_init();
2846         fdhash = hash_init();
2847         fnhash = hash_init();
2848
2849         /* initialize variables */
2850         for (i = 0; *vnames; i++) {
2851                 intvar[i] = v = newvar(nextword(&vnames));
2852                 if (*vvalues != '\377')
2853                         setvar_s(v, nextword(&vvalues));
2854                 else
2855                         setvar_i(v, 0);
2856
2857                 if (*vnames == '*') {
2858                         v->type |= VF_SPECIAL;
2859                         vnames++;
2860                 }
2861         }
2862
2863         handle_special(intvar[FS]);
2864         handle_special(intvar[RS]);
2865
2866         newfile("/dev/stdin")->F = stdin;
2867         newfile("/dev/stdout")->F = stdout;
2868         newfile("/dev/stderr")->F = stderr;
2869
2870         /* Huh, people report that sometimes environ is NULL. Oh well. */
2871         if (environ) for (envp = environ; *envp; envp++) {
2872                 /* environ is writable, thus we don't strdup it needlessly */
2873                 char *s = *envp;
2874                 char *s1 = strchr(s, '=');
2875                 if (s1) {
2876                         *s1 = '\0';
2877                         /* Both findvar and setvar_u take const char*
2878                          * as 2nd arg -> environment is not trashed */
2879                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2880                         *s1 = '=';
2881                 }
2882         }
2883         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2884         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2885         argv += optind;
2886         argc -= optind;
2887         if (opt & 0x1)
2888                 setvar_s(intvar[FS], opt_F); // -F
2889         while (list_v) { /* -v */
2890                 if (!is_assignment(llist_pop(&list_v)))
2891                         bb_show_usage();
2892         }
2893         if (list_f) { /* -f */
2894                 do {
2895                         char *s = NULL;
2896                         FILE *from_file;
2897
2898                         g_progname = llist_pop(&list_f);
2899                         from_file = xfopen_stdin(g_progname);
2900                         /* one byte is reserved for some trick in next_token */
2901                         for (i = j = 1; j > 0; i += j) {
2902                                 s = xrealloc(s, i + 4096);
2903                                 j = fread(s + i, 1, 4094, from_file);
2904                         }
2905                         s[i] = '\0';
2906                         fclose(from_file);
2907                         parse_program(s + 1);
2908                         free(s);
2909                 } while (list_f);
2910                 argc++;
2911         } else { // no -f: take program from 1st parameter
2912                 if (!argc)
2913                         bb_show_usage();
2914                 g_progname = "cmd. line";
2915                 parse_program(*argv++);
2916         }
2917         if (opt & 0x8) // -W
2918                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2919
2920         /* fill in ARGV array */
2921         setvar_i(intvar[ARGC], argc);
2922         setari_u(intvar[ARGV], 0, "awk");
2923         i = 0;
2924         while (*argv)
2925                 setari_u(intvar[ARGV], ++i, *argv++);
2926
2927         evaluate(beginseq.first, &tv);
2928         if (!mainseq.first && !endseq.first)
2929                 awk_exit(EXIT_SUCCESS);
2930
2931         /* input file could already be opened in BEGIN block */
2932         if (!iF) iF = next_input_file();
2933
2934         /* passing through input files */
2935         while (iF) {
2936                 nextfile = FALSE;
2937                 setvar_i(intvar[FNR], 0);
2938
2939                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2940                         nextrec = FALSE;
2941                         incvar(intvar[NR]);
2942                         incvar(intvar[FNR]);
2943                         evaluate(mainseq.first, &tv);
2944
2945                         if (nextfile)
2946                                 break;
2947                 }
2948
2949                 if (i < 0)
2950                         syntax_error(strerror(errno));
2951
2952                 iF = next_input_file();
2953         }
2954
2955         awk_exit(EXIT_SUCCESS);
2956         /*return 0;*/
2957 }