awk: fix -F 'regex' bug (miscounted fields if last field is empty)
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13 extern char **environ;
14
15 /* This is a NOEXEC applet. Be very careful! */
16
17
18 #define MAXVARFMT       240
19 #define MINNVBLOCK      64
20
21 /* variable flags */
22 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
23 #define VF_ARRAY        0x0002  /* 1 = it's an array */
24
25 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
26 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
28 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
32
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
35
36 /* Variable */
37 typedef struct var_s {
38         unsigned type;            /* flags */
39         double number;
40         char *string;
41         union {
42                 int aidx;               /* func arg idx (for compilation stage) */
43                 struct xhash_s *array;  /* array ptr */
44                 struct var_s *parent;   /* for func args, ptr to actual parameter */
45                 char **walker;          /* list of array elements (for..in) */
46         } x;
47 } var;
48
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
51         struct node_s *first;
52         struct node_s *last;
53         const char *programname;
54 } chain;
55
56 /* Function */
57 typedef struct func_s {
58         unsigned nargs;
59         struct chain_s body;
60 } func;
61
62 /* I/O stream */
63 typedef struct rstream_s {
64         FILE *F;
65         char *buffer;
66         int adv;
67         int size;
68         int pos;
69         smallint is_pipe;
70 } rstream;
71
72 typedef struct hash_item_s {
73         union {
74                 struct var_s v;         /* variable/array hash */
75                 struct rstream_s rs;    /* redirect streams hash */
76                 struct func_s f;        /* functions hash */
77         } data;
78         struct hash_item_s *next;       /* next in chain */
79         char name[1];                   /* really it's longer */
80 } hash_item;
81
82 typedef struct xhash_s {
83         unsigned nel;           /* num of elements */
84         unsigned csize;         /* current hash size */
85         unsigned nprime;        /* next hash size in PRIMES[] */
86         unsigned glen;          /* summary length of item names */
87         struct hash_item_s **items;
88 } xhash;
89
90 /* Tree node */
91 typedef struct node_s {
92         uint32_t info;
93         unsigned lineno;
94         union {
95                 struct node_s *n;
96                 var *v;
97                 int i;
98                 char *s;
99                 regex_t *re;
100         } l;
101         union {
102                 struct node_s *n;
103                 regex_t *ire;
104                 func *f;
105                 int argno;
106         } r;
107         union {
108                 struct node_s *n;
109         } a;
110 } node;
111
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
114         int size;
115         var *pos;
116         struct nvblock_s *prev;
117         struct nvblock_s *next;
118         var nv[0];
119 } nvblock;
120
121 typedef struct tsplitter_s {
122         node n;
123         regex_t re[2];
124 } tsplitter;
125
126 /* simple token classes */
127 /* Order and hex values are very important!!!  See next_token() */
128 #define TC_SEQSTART      1                              /* ( */
129 #define TC_SEQTERM      (1 << 1)                /* ) */
130 #define TC_REGEXP       (1 << 2)                /* /.../ */
131 #define TC_OUTRDR       (1 << 3)                /* | > >> */
132 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
133 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
134 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
135 #define TC_IN           (1 << 7)
136 #define TC_COMMA        (1 << 8)
137 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
138 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
139 #define TC_ARRTERM      (1 << 11)               /* ] */
140 #define TC_GRPSTART     (1 << 12)               /* { */
141 #define TC_GRPTERM      (1 << 13)               /* } */
142 #define TC_SEMICOL      (1 << 14)
143 #define TC_NEWLINE      (1 << 15)
144 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
145 #define TC_WHILE        (1 << 17)
146 #define TC_ELSE         (1 << 18)
147 #define TC_BUILTIN      (1 << 19)
148 #define TC_GETLINE      (1 << 20)
149 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
150 #define TC_BEGIN        (1 << 22)
151 #define TC_END          (1 << 23)
152 #define TC_EOF          (1 << 24)
153 #define TC_VARIABLE     (1 << 25)
154 #define TC_ARRAY        (1 << 26)
155 #define TC_FUNCTION     (1 << 27)
156 #define TC_STRING       (1 << 28)
157 #define TC_NUMBER       (1 << 29)
158
159 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
160
161 /* combined token classes */
162 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
169
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173
174 /* discard newlines after these */
175 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176                    | TC_BINOP | TC_OPTERM)
177
178 /* what can expression begin with */
179 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188
189 #define OF_RES1    0x010000
190 #define OF_RES2    0x020000
191 #define OF_STR1    0x040000
192 #define OF_STR2    0x080000
193 #define OF_NUM1    0x100000
194 #define OF_CHECKED 0x200000
195
196 /* combined operator flags */
197 #define xx      0
198 #define xV      OF_RES2
199 #define xS      (OF_RES2 | OF_STR2)
200 #define Vx      OF_RES1
201 #define VV      (OF_RES1 | OF_RES2)
202 #define Nx      (OF_RES1 | OF_NUM1)
203 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx      (OF_RES1 | OF_STR1)
205 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK   0x007F
210
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214  */
215 #define P(x)      (x << 24)
216 #define PRIMASK   0x7F000000
217 #define PRIMASK2  0x7E000000
218
219 /* Operation classes */
220
221 #define SHIFT_TIL_THIS  0x0600
222 #define RECUR_FROM_THIS 0x1000
223
224 enum {
225         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
226         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
227
228         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
229         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
230         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
231
232         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
233         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
234         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
235         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
236         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
237         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
238         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
239         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
240         OC_DONE = 0x2800,
241
242         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
243         ST_WHILE = 0x3300
244 };
245
246 /* simple builtins */
247 enum {
248         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
249         F_ti,   F_le,   F_sy,   F_ff,   F_cl
250 };
251
252 /* builtins */
253 enum {
254         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
255         B_ge,   B_gs,   B_su,
256         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
257 };
258
259 /* tokens and their corresponding info values */
260
261 #define NTC     "\377"  /* switch to next token class (tc<<1) */
262 #define NTCC    '\377'
263
264 #define OC_B    OC_BUILTIN
265
266 static const char tokenlist[] =
267         "\1("       NTC
268         "\1)"       NTC
269         "\1/"       NTC                                 /* REGEXP */
270         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
271         "\2++"      "\2--"      NTC                     /* UOPPOST */
272         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
273         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
274         "\2*="      "\2/="      "\2%="      "\2^="
275         "\1+"       "\1-"       "\3**="     "\2**"
276         "\1/"       "\1%"       "\1^"       "\1*"
277         "\2!="      "\2>="      "\2<="      "\1>"
278         "\1<"       "\2!~"      "\1~"       "\2&&"
279         "\2||"      "\1?"       "\1:"       NTC
280         "\2in"      NTC
281         "\1,"       NTC
282         "\1|"       NTC
283         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
284         "\1]"       NTC
285         "\1{"       NTC
286         "\1}"       NTC
287         "\1;"       NTC
288         "\1\n"      NTC
289         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
290         "\10continue"           "\6delete"  "\5print"
291         "\6printf"  "\4next"    "\10nextfile"
292         "\6return"  "\4exit"    NTC
293         "\5while"   NTC
294         "\4else"    NTC
295
296         "\3and"     "\5compl"   "\6lshift"  "\2or"
297         "\6rshift"  "\3xor"
298         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
299         "\3cos"     "\3exp"     "\3int"     "\3log"
300         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
301         "\6gensub"  "\4gsub"    "\5index"   "\6length"
302         "\5match"   "\5split"   "\7sprintf" "\3sub"
303         "\6substr"  "\7systime" "\10strftime"
304         "\7tolower" "\7toupper" NTC
305         "\7getline" NTC
306         "\4func"    "\10function"   NTC
307         "\5BEGIN"   NTC
308         "\3END"     "\0"
309         ;
310
311 static const uint32_t tokeninfo[] = {
312         0,
313         0,
314         OC_REGEXP,
315         xS|'a',     xS|'w',     xS|'|',
316         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
317         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
318             OC_FIELD|xV|P(5),
319         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
320             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
321         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
322             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
323         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
324             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
325         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
326             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
327         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
328             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
329         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
330             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
331         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
332             OC_COLON|xx|P(67)|':',
333         OC_IN|SV|P(49),
334         OC_COMMA|SS|P(80),
335         OC_PGETLINE|SV|P(37),
336         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
337             OC_UNARY|xV|P(19)|'!',
338         0,
339         0,
340         0,
341         0,
342         0,
343         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
344         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
345         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
346         OC_RETURN|Vx,   OC_EXIT|Nx,
347         ST_WHILE,
348         0,
349
350         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
357         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
358         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
359         OC_GETLINE|SV|P(0),
360         0,      0,
361         0,
362         0
363 };
364
365 /* internal variable names and their initial values       */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 enum {
368         CONVFMT,    OFMT,       FS,         OFS,
369         ORS,        RS,         RT,         FILENAME,
370         SUBSEP,     ARGIND,     ARGC,       ARGV,
371         ERRNO,      FNR,
372         NR,         NF,         IGNORECASE,
373         ENVIRON,    F0,         NUM_INTERNAL_VARS
374 };
375
376 static const char vNames[] =
377         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
378         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
379         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
380         "ERRNO\0"   "FNR\0"
381         "NR\0"      "NF\0*"     "IGNORECASE\0*"
382         "ENVIRON\0" "$\0*"      "\0";
383
384 static const char vValues[] =
385         "%.6g\0"    "%.6g\0"    " \0"       " \0"
386         "\n\0"      "\n\0"      "\0"        "\0"
387         "\034\0"
388         "\377";
389
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393
394
395
396 /* Globals. Split in two parts so that first one is addressed
397  * with (mostly short) negative offsets */
398 struct globals {
399         chain beginseq, mainseq, endseq, *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         double t_double;
447         tsplitter fsplitter, rsplitter;
448 };
449 #define G1 (ptr_to_globals[-1])
450 #define G (*(struct globals2 *const)ptr_to_globals)
451 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
452 /* char G1size[sizeof(G1)]; - 0x6c */
453 /* char Gsize[sizeof(G)]; - 0x1cc */
454 /* Trying to keep most of members accessible with short offsets: */
455 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_double     (G.t_double    )
484 #define t_lineno     (G.t_lineno    )
485 #define t_rollback   (G.t_rollback  )
486 #define intvar       (G.intvar      )
487 #define fsplitter    (G.fsplitter   )
488 #define rsplitter    (G.rsplitter   )
489 #define INIT_G() do { \
490         PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
491         G.next_token__ltclass = TC_OPTERM; \
492         G.evaluate__seed = 1; \
493 } while (0)
494
495
496 /* function prototypes */
497 static void handle_special(var *);
498 static node *parse_expr(uint32_t);
499 static void chain_group(void);
500 static var *evaluate(node *, var *);
501 static rstream *next_input_file(void);
502 static int fmt_num(char *, int, const char *, double, int);
503 static int awk_exit(int) ATTRIBUTE_NORETURN;
504
505 /* ---- error handling ---- */
506
507 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
508 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
509 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
510 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
511 static const char EMSG_INV_FMT[] = "Invalid format specifier";
512 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
513 static const char EMSG_NOT_ARRAY[] = "Not an array";
514 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
515 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
516 #if !ENABLE_FEATURE_AWK_MATH
517 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
518 #endif
519
520 static void zero_out_var(var * vp)
521 {
522         memset(vp, 0, sizeof(*vp));
523 }
524
525 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
526 static void syntax_error(const char * const message)
527 {
528         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
529 }
530
531 /* ---- hash stuff ---- */
532
533 static unsigned hashidx(const char *name)
534 {
535         unsigned idx = 0;
536
537         while (*name) idx = *name++ + (idx << 6) - idx;
538         return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544         xhash *newhash;
545
546         newhash = xzalloc(sizeof(xhash));
547         newhash->csize = FIRST_PRIME;
548         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
549
550         return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556         hash_item *hi;
557
558         hi = hash->items [ hashidx(name) % hash->csize ];
559         while (hi) {
560                 if (strcmp(hi->name, name) == 0)
561                         return &(hi->data);
562                 hi = hi->next;
563         }
564         return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570         unsigned newsize, i, idx;
571         hash_item **newitems, *hi, *thi;
572
573         if (hash->nprime == ARRAY_SIZE(PRIMES))
574                 return;
575
576         newsize = PRIMES[hash->nprime++];
577         newitems = xzalloc(newsize * sizeof(hash_item *));
578
579         for (i = 0; i < hash->csize; i++) {
580                 hi = hash->items[i];
581                 while (hi) {
582                         thi = hi;
583                         hi = thi->next;
584                         idx = hashidx(thi->name) % newsize;
585                         thi->next = newitems[idx];
586                         newitems[idx] = thi;
587                 }
588         }
589
590         free(hash->items);
591         hash->csize = newsize;
592         hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598         hash_item *hi;
599         unsigned idx;
600         int l;
601
602         hi = hash_search(hash, name);
603         if (!hi) {
604                 if (++hash->nel / hash->csize > 10)
605                         hash_rebuild(hash);
606
607                 l = strlen(name) + 1;
608                 hi = xzalloc(sizeof(hash_item) + l);
609                 memcpy(hi->name, name, l);
610
611                 idx = hashidx(name) % hash->csize;
612                 hi->next = hash->items[idx];
613                 hash->items[idx] = hi;
614                 hash->glen += l;
615         }
616         return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
620 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
621 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626         hash_item *hi, **phi;
627
628         phi = &(hash->items[hashidx(name) % hash->csize]);
629         while (*phi) {
630                 hi = *phi;
631                 if (strcmp(hi->name, name) == 0) {
632                         hash->glen -= (strlen(name) + 1);
633                         hash->nel--;
634                         *phi = hi->next;
635                         free(hi);
636                         break;
637                 }
638                 phi = &(hi->next);
639         }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646         char *p = *s;
647
648         while (1) {
649                 if (*p == '\\' && p[1] == '\n') {
650                         p++;
651                         t_lineno++;
652                 } else if (*p != ' ' && *p != '\t') {
653                         break;
654                 }
655                 p++;
656         }
657         *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662         char *p = *s;
663
664         while (*(*s)++) /* */;
665
666         return p;
667 }
668
669 static char nextchar(char **s)
670 {
671         char c, *pps;
672
673         c = *((*s)++);
674         pps = *s;
675         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
676         if (c == '\\' && *s == pps) c = *((*s)++);
677         return c;
678 }
679
680 static int ALWAYS_INLINE isalnum_(int c)
681 {
682         return (isalnum(c) || c == '_');
683 }
684
685 static FILE *afopen(const char *path, const char *mode)
686 {
687         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
688 }
689
690 /* -------- working with variables (set/get/copy/etc) -------- */
691
692 static xhash *iamarray(var *v)
693 {
694         var *a = v;
695
696         while (a->type & VF_CHILD)
697                 a = a->x.parent;
698
699         if (!(a->type & VF_ARRAY)) {
700                 a->type |= VF_ARRAY;
701                 a->x.array = hash_init();
702         }
703         return a->x.array;
704 }
705
706 static void clear_array(xhash *array)
707 {
708         unsigned i;
709         hash_item *hi, *thi;
710
711         for (i = 0; i < array->csize; i++) {
712                 hi = array->items[i];
713                 while (hi) {
714                         thi = hi;
715                         hi = hi->next;
716                         free(thi->data.v.string);
717                         free(thi);
718                 }
719                 array->items[i] = NULL;
720         }
721         array->glen = array->nel = 0;
722 }
723
724 /* clear a variable */
725 static var *clrvar(var *v)
726 {
727         if (!(v->type & VF_FSTR))
728                 free(v->string);
729
730         v->type &= VF_DONTTOUCH;
731         v->type |= VF_DIRTY;
732         v->string = NULL;
733         return v;
734 }
735
736 /* assign string value to variable */
737 static var *setvar_p(var *v, char *value)
738 {
739         clrvar(v);
740         v->string = value;
741         handle_special(v);
742         return v;
743 }
744
745 /* same as setvar_p but make a copy of string */
746 static var *setvar_s(var *v, const char *value)
747 {
748         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
749 }
750
751 /* same as setvar_s but set USER flag */
752 static var *setvar_u(var *v, const char *value)
753 {
754         setvar_s(v, value);
755         v->type |= VF_USER;
756         return v;
757 }
758
759 /* set array element to user string */
760 static void setari_u(var *a, int idx, const char *s)
761 {
762         char sidx[sizeof(int)*3 + 1];
763         var *v;
764
765         sprintf(sidx, "%d", idx);
766         v = findvar(iamarray(a), sidx);
767         setvar_u(v, s);
768 }
769
770 /* assign numeric value to variable */
771 static var *setvar_i(var *v, double value)
772 {
773         clrvar(v);
774         v->type |= VF_NUMBER;
775         v->number = value;
776         handle_special(v);
777         return v;
778 }
779
780 static const char *getvar_s(var *v)
781 {
782         /* if v is numeric and has no cached string, convert it to string */
783         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
784                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
785                 v->string = xstrdup(g_buf);
786                 v->type |= VF_CACHED;
787         }
788         return (v->string == NULL) ? "" : v->string;
789 }
790
791 static double getvar_i(var *v)
792 {
793         char *s;
794
795         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
796                 v->number = 0;
797                 s = v->string;
798                 if (s && *s) {
799                         v->number = strtod(s, &s);
800                         if (v->type & VF_USER) {
801                                 skip_spaces(&s);
802                                 if (*s != '\0')
803                                         v->type &= ~VF_USER;
804                         }
805                 } else {
806                         v->type &= ~VF_USER;
807                 }
808                 v->type |= VF_CACHED;
809         }
810         return v->number;
811 }
812
813 static var *copyvar(var *dest, const var *src)
814 {
815         if (dest != src) {
816                 clrvar(dest);
817                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
818                 dest->number = src->number;
819                 if (src->string)
820                         dest->string = xstrdup(src->string);
821         }
822         handle_special(dest);
823         return dest;
824 }
825
826 static var *incvar(var *v)
827 {
828         return setvar_i(v, getvar_i(v)+1.);
829 }
830
831 /* return true if v is number or numeric string */
832 static int is_numeric(var *v)
833 {
834         getvar_i(v);
835         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
836 }
837
838 /* return 1 when value of v corresponds to true, 0 otherwise */
839 static int istrue(var *v)
840 {
841         if (is_numeric(v))
842                 return (v->number == 0) ? 0 : 1;
843         return (v->string && *(v->string)) ? 1 : 0;
844 }
845
846 /* temporary variables allocator. Last allocated should be first freed */
847 static var *nvalloc(int n)
848 {
849         nvblock *pb = NULL;
850         var *v, *r;
851         int size;
852
853         while (g_cb) {
854                 pb = g_cb;
855                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
856                 g_cb = g_cb->next;
857         }
858
859         if (!g_cb) {
860                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
861                 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
862                 g_cb->size = size;
863                 g_cb->pos = g_cb->nv;
864                 g_cb->prev = pb;
865                 g_cb->next = NULL;
866                 if (pb) pb->next = g_cb;
867         }
868
869         v = r = g_cb->pos;
870         g_cb->pos += n;
871
872         while (v < g_cb->pos) {
873                 v->type = 0;
874                 v->string = NULL;
875                 v++;
876         }
877
878         return r;
879 }
880
881 static void nvfree(var *v)
882 {
883         var *p;
884
885         if (v < g_cb->nv || v >= g_cb->pos)
886                 syntax_error(EMSG_INTERNAL_ERROR);
887
888         for (p = v; p < g_cb->pos; p++) {
889                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
890                         clear_array(iamarray(p));
891                         free(p->x.array->items);
892                         free(p->x.array);
893                 }
894                 if (p->type & VF_WALK)
895                         free(p->x.walker);
896
897                 clrvar(p);
898         }
899
900         g_cb->pos = v;
901         while (g_cb->prev && g_cb->pos == g_cb->nv) {
902                 g_cb = g_cb->prev;
903         }
904 }
905
906 /* ------- awk program text parsing ------- */
907
908 /* Parse next token pointed by global pos, place results into global ttt.
909  * If token isn't expected, give away. Return token class
910  */
911 static uint32_t next_token(uint32_t expected)
912 {
913 #define concat_inserted (G.next_token__concat_inserted)
914 #define save_tclass     (G.next_token__save_tclass)
915 #define save_info       (G.next_token__save_info)
916 /* Initialized to TC_OPTERM: */
917 #define ltclass         (G.next_token__ltclass)
918
919         char *p, *pp, *s;
920         const char *tl;
921         uint32_t tc;
922         const uint32_t *ti;
923         int l;
924
925         if (t_rollback) {
926                 t_rollback = FALSE;
927
928         } else if (concat_inserted) {
929                 concat_inserted = FALSE;
930                 t_tclass = save_tclass;
931                 t_info = save_info;
932
933         } else {
934                 p = g_pos;
935  readnext:
936                 skip_spaces(&p);
937                 g_lineno = t_lineno;
938                 if (*p == '#')
939                         while (*p != '\n' && *p != '\0')
940                                 p++;
941
942                 if (*p == '\n')
943                         t_lineno++;
944
945                 if (*p == '\0') {
946                         tc = TC_EOF;
947
948                 } else if (*p == '\"') {
949                         /* it's a string */
950                         t_string = s = ++p;
951                         while (*p != '\"') {
952                                 if (*p == '\0' || *p == '\n')
953                                         syntax_error(EMSG_UNEXP_EOS);
954                                 *(s++) = nextchar(&p);
955                         }
956                         p++;
957                         *s = '\0';
958                         tc = TC_STRING;
959
960                 } else if ((expected & TC_REGEXP) && *p == '/') {
961                         /* it's regexp */
962                         t_string = s = ++p;
963                         while (*p != '/') {
964                                 if (*p == '\0' || *p == '\n')
965                                         syntax_error(EMSG_UNEXP_EOS);
966                                 *s = *p++;
967                                 if (*s++ == '\\') {
968                                         pp = p;
969                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
970                                         if (*pp == '\\')
971                                                 *s++ = '\\';
972                                         if (p == pp)
973                                                 *s++ = *p++;
974                                 }
975                         }
976                         p++;
977                         *s = '\0';
978                         tc = TC_REGEXP;
979
980                 } else if (*p == '.' || isdigit(*p)) {
981                         /* it's a number */
982                         t_double = strtod(p, &p);
983                         if (*p == '.')
984                                 syntax_error(EMSG_UNEXP_TOKEN);
985                         tc = TC_NUMBER;
986
987                 } else {
988                         /* search for something known */
989                         tl = tokenlist;
990                         tc = 0x00000001;
991                         ti = tokeninfo;
992                         while (*tl) {
993                                 l = *(tl++);
994                                 if (l == NTCC) {
995                                         tc <<= 1;
996                                         continue;
997                                 }
998                                 /* if token class is expected, token
999                                  * matches and it's not a longer word,
1000                                  * then this is what we are looking for
1001                                  */
1002                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1003                                  && *tl == *p && strncmp(p, tl, l) == 0
1004                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1005                                 ) {
1006                                         t_info = *ti;
1007                                         p += l;
1008                                         break;
1009                                 }
1010                                 ti++;
1011                                 tl += l;
1012                         }
1013
1014                         if (!*tl) {
1015                                 /* it's a name (var/array/function),
1016                                  * otherwise it's something wrong
1017                                  */
1018                                 if (!isalnum_(*p))
1019                                         syntax_error(EMSG_UNEXP_TOKEN);
1020
1021                                 t_string = --p;
1022                                 while (isalnum_(*(++p))) {
1023                                         *(p-1) = *p;
1024                                 }
1025                                 *(p-1) = '\0';
1026                                 tc = TC_VARIABLE;
1027                                 /* also consume whitespace between functionname and bracket */
1028                                 if (!(expected & TC_VARIABLE))
1029                                         skip_spaces(&p);
1030                                 if (*p == '(') {
1031                                         tc = TC_FUNCTION;
1032                                 } else {
1033                                         if (*p == '[') {
1034                                                 p++;
1035                                                 tc = TC_ARRAY;
1036                                         }
1037                                 }
1038                         }
1039                 }
1040                 g_pos = p;
1041
1042                 /* skipping newlines in some cases */
1043                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1044                         goto readnext;
1045
1046                 /* insert concatenation operator when needed */
1047                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1048                         concat_inserted = TRUE;
1049                         save_tclass = tc;
1050                         save_info = t_info;
1051                         tc = TC_BINOP;
1052                         t_info = OC_CONCAT | SS | P(35);
1053                 }
1054
1055                 t_tclass = tc;
1056         }
1057         ltclass = t_tclass;
1058
1059         /* Are we ready for this? */
1060         if (!(ltclass & expected))
1061                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1062                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1063
1064         return ltclass;
1065 #undef concat_inserted
1066 #undef save_tclass
1067 #undef save_info
1068 #undef ltclass
1069 }
1070
1071 static void rollback_token(void)
1072 {
1073         t_rollback = TRUE;
1074 }
1075
1076 static node *new_node(uint32_t info)
1077 {
1078         node *n;
1079
1080         n = xzalloc(sizeof(node));
1081         n->info = info;
1082         n->lineno = g_lineno;
1083         return n;
1084 }
1085
1086 static node *mk_re_node(const char *s, node *n, regex_t *re)
1087 {
1088         n->info = OC_REGEXP;
1089         n->l.re = re;
1090         n->r.ire = re + 1;
1091         xregcomp(re, s, REG_EXTENDED);
1092         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1093
1094         return n;
1095 }
1096
1097 static node *condition(void)
1098 {
1099         next_token(TC_SEQSTART);
1100         return parse_expr(TC_SEQTERM);
1101 }
1102
1103 /* parse expression terminated by given argument, return ptr
1104  * to built subtree. Terminator is eaten by parse_expr */
1105 static node *parse_expr(uint32_t iexp)
1106 {
1107         node sn;
1108         node *cn = &sn;
1109         node *vn, *glptr;
1110         uint32_t tc, xtc;
1111         var *v;
1112
1113         sn.info = PRIMASK;
1114         sn.r.n = glptr = NULL;
1115         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1116
1117         while (!((tc = next_token(xtc)) & iexp)) {
1118                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1119                         /* input redirection (<) attached to glptr node */
1120                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1121                         cn->a.n = glptr;
1122                         xtc = TC_OPERAND | TC_UOPPRE;
1123                         glptr = NULL;
1124
1125                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1126                         /* for binary and postfix-unary operators, jump back over
1127                          * previous operators with higher priority */
1128                         vn = cn;
1129                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1130                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1131                                 vn = vn->a.n;
1132                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1133                                 t_info += P(6);
1134                         cn = vn->a.n->r.n = new_node(t_info);
1135                         cn->a.n = vn->a.n;
1136                         if (tc & TC_BINOP) {
1137                                 cn->l.n = vn;
1138                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1139                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1140                                         /* it's a pipe */
1141                                         next_token(TC_GETLINE);
1142                                         /* give maximum priority to this pipe */
1143                                         cn->info &= ~PRIMASK;
1144                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1145                                 }
1146                         } else {
1147                                 cn->r.n = vn;
1148                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1149                         }
1150                         vn->a.n = cn;
1151
1152                 } else {
1153                         /* for operands and prefix-unary operators, attach them
1154                          * to last node */
1155                         vn = cn;
1156                         cn = vn->r.n = new_node(t_info);
1157                         cn->a.n = vn;
1158                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1159                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1160                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1161                                 /* one should be very careful with switch on tclass -
1162                                  * only simple tclasses should be used! */
1163                                 switch (tc) {
1164                                 case TC_VARIABLE:
1165                                 case TC_ARRAY:
1166                                         cn->info = OC_VAR;
1167                                         v = hash_search(ahash, t_string);
1168                                         if (v != NULL) {
1169                                                 cn->info = OC_FNARG;
1170                                                 cn->l.i = v->x.aidx;
1171                                         } else {
1172                                                 cn->l.v = newvar(t_string);
1173                                         }
1174                                         if (tc & TC_ARRAY) {
1175                                                 cn->info |= xS;
1176                                                 cn->r.n = parse_expr(TC_ARRTERM);
1177                                         }
1178                                         break;
1179
1180                                 case TC_NUMBER:
1181                                 case TC_STRING:
1182                                         cn->info = OC_VAR;
1183                                         v = cn->l.v = xzalloc(sizeof(var));
1184                                         if (tc & TC_NUMBER)
1185                                                 setvar_i(v, t_double);
1186                                         else
1187                                                 setvar_s(v, t_string);
1188                                         break;
1189
1190                                 case TC_REGEXP:
1191                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1192                                         break;
1193
1194                                 case TC_FUNCTION:
1195                                         cn->info = OC_FUNC;
1196                                         cn->r.f = newfunc(t_string);
1197                                         cn->l.n = condition();
1198                                         break;
1199
1200                                 case TC_SEQSTART:
1201                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1202                                         cn->a.n = vn;
1203                                         break;
1204
1205                                 case TC_GETLINE:
1206                                         glptr = cn;
1207                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1208                                         break;
1209
1210                                 case TC_BUILTIN:
1211                                         cn->l.n = condition();
1212                                         break;
1213                                 }
1214                         }
1215                 }
1216         }
1217         return sn.r.n;
1218 }
1219
1220 /* add node to chain. Return ptr to alloc'd node */
1221 static node *chain_node(uint32_t info)
1222 {
1223         node *n;
1224
1225         if (!seq->first)
1226                 seq->first = seq->last = new_node(0);
1227
1228         if (seq->programname != g_progname) {
1229                 seq->programname = g_progname;
1230                 n = chain_node(OC_NEWSOURCE);
1231                 n->l.s = xstrdup(g_progname);
1232         }
1233
1234         n = seq->last;
1235         n->info = info;
1236         seq->last = n->a.n = new_node(OC_DONE);
1237
1238         return n;
1239 }
1240
1241 static void chain_expr(uint32_t info)
1242 {
1243         node *n;
1244
1245         n = chain_node(info);
1246         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1247         if (t_tclass & TC_GRPTERM)
1248                 rollback_token();
1249 }
1250
1251 static node *chain_loop(node *nn)
1252 {
1253         node *n, *n2, *save_brk, *save_cont;
1254
1255         save_brk = break_ptr;
1256         save_cont = continue_ptr;
1257
1258         n = chain_node(OC_BR | Vx);
1259         continue_ptr = new_node(OC_EXEC);
1260         break_ptr = new_node(OC_EXEC);
1261         chain_group();
1262         n2 = chain_node(OC_EXEC | Vx);
1263         n2->l.n = nn;
1264         n2->a.n = n;
1265         continue_ptr->a.n = n2;
1266         break_ptr->a.n = n->r.n = seq->last;
1267
1268         continue_ptr = save_cont;
1269         break_ptr = save_brk;
1270
1271         return n;
1272 }
1273
1274 /* parse group and attach it to chain */
1275 static void chain_group(void)
1276 {
1277         uint32_t c;
1278         node *n, *n2, *n3;
1279
1280         do {
1281                 c = next_token(TC_GRPSEQ);
1282         } while (c & TC_NEWLINE);
1283
1284         if (c & TC_GRPSTART) {
1285                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1286                         if (t_tclass & TC_NEWLINE) continue;
1287                         rollback_token();
1288                         chain_group();
1289                 }
1290         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1291                 rollback_token();
1292                 chain_expr(OC_EXEC | Vx);
1293         } else {                                                /* TC_STATEMNT */
1294                 switch (t_info & OPCLSMASK) {
1295                 case ST_IF:
1296                         n = chain_node(OC_BR | Vx);
1297                         n->l.n = condition();
1298                         chain_group();
1299                         n2 = chain_node(OC_EXEC);
1300                         n->r.n = seq->last;
1301                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1302                                 chain_group();
1303                                 n2->a.n = seq->last;
1304                         } else {
1305                                 rollback_token();
1306                         }
1307                         break;
1308
1309                 case ST_WHILE:
1310                         n2 = condition();
1311                         n = chain_loop(NULL);
1312                         n->l.n = n2;
1313                         break;
1314
1315                 case ST_DO:
1316                         n2 = chain_node(OC_EXEC);
1317                         n = chain_loop(NULL);
1318                         n2->a.n = n->a.n;
1319                         next_token(TC_WHILE);
1320                         n->l.n = condition();
1321                         break;
1322
1323                 case ST_FOR:
1324                         next_token(TC_SEQSTART);
1325                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1326                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1327                                 if ((n2->info & OPCLSMASK) != OC_IN)
1328                                         syntax_error(EMSG_UNEXP_TOKEN);
1329                                 n = chain_node(OC_WALKINIT | VV);
1330                                 n->l.n = n2->l.n;
1331                                 n->r.n = n2->r.n;
1332                                 n = chain_loop(NULL);
1333                                 n->info = OC_WALKNEXT | Vx;
1334                                 n->l.n = n2->l.n;
1335                         } else {                        /* for (;;) */
1336                                 n = chain_node(OC_EXEC | Vx);
1337                                 n->l.n = n2;
1338                                 n2 = parse_expr(TC_SEMICOL);
1339                                 n3 = parse_expr(TC_SEQTERM);
1340                                 n = chain_loop(n3);
1341                                 n->l.n = n2;
1342                                 if (!n2)
1343                                         n->info = OC_EXEC;
1344                         }
1345                         break;
1346
1347                 case OC_PRINT:
1348                 case OC_PRINTF:
1349                         n = chain_node(t_info);
1350                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1351                         if (t_tclass & TC_OUTRDR) {
1352                                 n->info |= t_info;
1353                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1354                         }
1355                         if (t_tclass & TC_GRPTERM)
1356                                 rollback_token();
1357                         break;
1358
1359                 case OC_BREAK:
1360                         n = chain_node(OC_EXEC);
1361                         n->a.n = break_ptr;
1362                         break;
1363
1364                 case OC_CONTINUE:
1365                         n = chain_node(OC_EXEC);
1366                         n->a.n = continue_ptr;
1367                         break;
1368
1369                 /* delete, next, nextfile, return, exit */
1370                 default:
1371                         chain_expr(t_info);
1372                 }
1373         }
1374 }
1375
1376 static void parse_program(char *p)
1377 {
1378         uint32_t tclass;
1379         node *cn;
1380         func *f;
1381         var *v;
1382
1383         g_pos = p;
1384         t_lineno = 1;
1385         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1386                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1387
1388                 if (tclass & TC_OPTERM)
1389                         continue;
1390
1391                 seq = &mainseq;
1392                 if (tclass & TC_BEGIN) {
1393                         seq = &beginseq;
1394                         chain_group();
1395
1396                 } else if (tclass & TC_END) {
1397                         seq = &endseq;
1398                         chain_group();
1399
1400                 } else if (tclass & TC_FUNCDECL) {
1401                         next_token(TC_FUNCTION);
1402                         g_pos++;
1403                         f = newfunc(t_string);
1404                         f->body.first = NULL;
1405                         f->nargs = 0;
1406                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1407                                 v = findvar(ahash, t_string);
1408                                 v->x.aidx = (f->nargs)++;
1409
1410                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1411                                         break;
1412                         }
1413                         seq = &(f->body);
1414                         chain_group();
1415                         clear_array(ahash);
1416
1417                 } else if (tclass & TC_OPSEQ) {
1418                         rollback_token();
1419                         cn = chain_node(OC_TEST);
1420                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1421                         if (t_tclass & TC_GRPSTART) {
1422                                 rollback_token();
1423                                 chain_group();
1424                         } else {
1425                                 chain_node(OC_PRINT);
1426                         }
1427                         cn->r.n = mainseq.last;
1428
1429                 } else /* if (tclass & TC_GRPSTART) */ {
1430                         rollback_token();
1431                         chain_group();
1432                 }
1433         }
1434 }
1435
1436
1437 /* -------- program execution part -------- */
1438
1439 static node *mk_splitter(const char *s, tsplitter *spl)
1440 {
1441         regex_t *re, *ire;
1442         node *n;
1443
1444         re = &spl->re[0];
1445         ire = &spl->re[1];
1446         n = &spl->n;
1447         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1448                 regfree(re);
1449                 regfree(ire); // TODO: nuke ire, use re+1?
1450         }
1451         if (strlen(s) > 1) {
1452                 mk_re_node(s, n, re);
1453         } else {
1454                 n->info = (uint32_t) *s;
1455         }
1456
1457         return n;
1458 }
1459
1460 /* use node as a regular expression. Supplied with node ptr and regex_t
1461  * storage space. Return ptr to regex (if result points to preg, it should
1462  * be later regfree'd manually
1463  */
1464 static regex_t *as_regex(node *op, regex_t *preg)
1465 {
1466         var *v;
1467         const char *s;
1468
1469         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1470                 return icase ? op->r.ire : op->l.re;
1471         }
1472         v = nvalloc(1);
1473         s = getvar_s(evaluate(op, v));
1474         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1475         nvfree(v);
1476         return preg;
1477 }
1478
1479 /* gradually increasing buffer */
1480 static void qrealloc(char **b, int n, int *size)
1481 {
1482         if (!*b || n >= *size)
1483                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1484 }
1485
1486 /* resize field storage space */
1487 static void fsrealloc(int size)
1488 {
1489         int i;
1490
1491         if (size >= maxfields) {
1492                 i = maxfields;
1493                 maxfields = size + 16;
1494                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1495                 for (; i < maxfields; i++) {
1496                         Fields[i].type = VF_SPECIAL;
1497                         Fields[i].string = NULL;
1498                 }
1499         }
1500
1501         if (size < nfields) {
1502                 for (i = size; i < nfields; i++) {
1503                         clrvar(Fields + i);
1504                 }
1505         }
1506         nfields = size;
1507 }
1508
1509 static int awk_split(const char *s, node *spl, char **slist)
1510 {
1511         int l, n = 0;
1512         char c[4];
1513         char *s1;
1514         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1515
1516         /* in worst case, each char would be a separate field */
1517         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1518         strcpy(s1, s);
1519
1520         c[0] = c[1] = (char)spl->info;
1521         c[2] = c[3] = '\0';
1522         if (*getvar_s(intvar[RS]) == '\0')
1523                 c[2] = '\n';
1524
1525         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1526                 if (!*s)
1527                         return n; /* "": zero fields */
1528                 n++; /* at least one field will be there */
1529                 do {
1530                         l = strcspn(s, c+2); /* len till next NUL or \n */
1531                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1532                          && pmatch[0].rm_so <= l
1533                         ) {
1534                                 l = pmatch[0].rm_so;
1535                                 if (pmatch[0].rm_eo == 0) {
1536                                         l++;
1537                                         pmatch[0].rm_eo++;
1538                                 }
1539                                 n++; /* we saw yet another delimiter */
1540                         } else {
1541                                 pmatch[0].rm_eo = l;
1542                                 if (s[l]) pmatch[0].rm_eo++;
1543                         }
1544                         memcpy(s1, s, l);
1545                         s1[l] = '\0';
1546                         nextword(&s1);
1547                         s += pmatch[0].rm_eo;
1548                 } while (*s);
1549                 return n;
1550         }
1551         if (c[0] == '\0') {  /* null split */
1552                 while (*s) {
1553                         *s1++ = *s++;
1554                         *s1++ = '\0';
1555                         n++;
1556                 }
1557                 return n;
1558         }
1559         if (c[0] != ' ') {  /* single-character split */
1560                 if (icase) {
1561                         c[0] = toupper(c[0]);
1562                         c[1] = tolower(c[1]);
1563                 }
1564                 if (*s1) n++;
1565                 while ((s1 = strpbrk(s1, c))) {
1566                         *s1++ = '\0';
1567                         n++;
1568                 }
1569                 return n;
1570         }
1571         /* space split */
1572         while (*s) {
1573                 s = skip_whitespace(s);
1574                 if (!*s) break;
1575                 n++;
1576                 while (*s && !isspace(*s))
1577                         *s1++ = *s++;
1578                 *s1++ = '\0';
1579         }
1580         return n;
1581 }
1582
1583 static void split_f0(void)
1584 {
1585 /* static char *fstrings; */
1586 #define fstrings (G.split_f0__fstrings)
1587
1588         int i, n;
1589         char *s;
1590
1591         if (is_f0_split)
1592                 return;
1593
1594         is_f0_split = TRUE;
1595         free(fstrings);
1596         fsrealloc(0);
1597         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1598         fsrealloc(n);
1599         s = fstrings;
1600         for (i = 0; i < n; i++) {
1601                 Fields[i].string = nextword(&s);
1602                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1603         }
1604
1605         /* set NF manually to avoid side effects */
1606         clrvar(intvar[NF]);
1607         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1608         intvar[NF]->number = nfields;
1609 #undef fstrings
1610 }
1611
1612 /* perform additional actions when some internal variables changed */
1613 static void handle_special(var *v)
1614 {
1615         int n;
1616         char *b;
1617         const char *sep, *s;
1618         int sl, l, len, i, bsize;
1619
1620         if (!(v->type & VF_SPECIAL))
1621                 return;
1622
1623         if (v == intvar[NF]) {
1624                 n = (int)getvar_i(v);
1625                 fsrealloc(n);
1626
1627                 /* recalculate $0 */
1628                 sep = getvar_s(intvar[OFS]);
1629                 sl = strlen(sep);
1630                 b = NULL;
1631                 len = 0;
1632                 for (i = 0; i < n; i++) {
1633                         s = getvar_s(&Fields[i]);
1634                         l = strlen(s);
1635                         if (b) {
1636                                 memcpy(b+len, sep, sl);
1637                                 len += sl;
1638                         }
1639                         qrealloc(&b, len+l+sl, &bsize);
1640                         memcpy(b+len, s, l);
1641                         len += l;
1642                 }
1643                 if (b)
1644                         b[len] = '\0';
1645                 setvar_p(intvar[F0], b);
1646                 is_f0_split = TRUE;
1647
1648         } else if (v == intvar[F0]) {
1649                 is_f0_split = FALSE;
1650
1651         } else if (v == intvar[FS]) {
1652                 mk_splitter(getvar_s(v), &fsplitter);
1653
1654         } else if (v == intvar[RS]) {
1655                 mk_splitter(getvar_s(v), &rsplitter);
1656
1657         } else if (v == intvar[IGNORECASE]) {
1658                 icase = istrue(v);
1659
1660         } else {                                /* $n */
1661                 n = getvar_i(intvar[NF]);
1662                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1663                 /* right here v is invalid. Just to note... */
1664         }
1665 }
1666
1667 /* step through func/builtin/etc arguments */
1668 static node *nextarg(node **pn)
1669 {
1670         node *n;
1671
1672         n = *pn;
1673         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1674                 *pn = n->r.n;
1675                 n = n->l.n;
1676         } else {
1677                 *pn = NULL;
1678         }
1679         return n;
1680 }
1681
1682 static void hashwalk_init(var *v, xhash *array)
1683 {
1684         char **w;
1685         hash_item *hi;
1686         int i;
1687
1688         if (v->type & VF_WALK)
1689                 free(v->x.walker);
1690
1691         v->type |= VF_WALK;
1692         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1693         w[0] = w[1] = (char *)(w + 2);
1694         for (i = 0; i < array->csize; i++) {
1695                 hi = array->items[i];
1696                 while (hi) {
1697                         strcpy(*w, hi->name);
1698                         nextword(w);
1699                         hi = hi->next;
1700                 }
1701         }
1702 }
1703
1704 static int hashwalk_next(var *v)
1705 {
1706         char **w;
1707
1708         w = v->x.walker;
1709         if (w[1] == w[0])
1710                 return FALSE;
1711
1712         setvar_s(v, nextword(w+1));
1713         return TRUE;
1714 }
1715
1716 /* evaluate node, return 1 when result is true, 0 otherwise */
1717 static int ptest(node *pattern)
1718 {
1719         /* ptest__v is "static": to save stack space? */
1720         return istrue(evaluate(pattern, &G.ptest__v));
1721 }
1722
1723 /* read next record from stream rsm into a variable v */
1724 static int awk_getline(rstream *rsm, var *v)
1725 {
1726         char *b;
1727         regmatch_t pmatch[2];
1728         int a, p, pp=0, size;
1729         int fd, so, eo, r, rp;
1730         char c, *m, *s;
1731
1732         /* we're using our own buffer since we need access to accumulating
1733          * characters
1734          */
1735         fd = fileno(rsm->F);
1736         m = rsm->buffer;
1737         a = rsm->adv;
1738         p = rsm->pos;
1739         size = rsm->size;
1740         c = (char) rsplitter.n.info;
1741         rp = 0;
1742
1743         if (!m) qrealloc(&m, 256, &size);
1744         do {
1745                 b = m + a;
1746                 so = eo = p;
1747                 r = 1;
1748                 if (p > 0) {
1749                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1750                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1751                                                         b, 1, pmatch, 0) == 0) {
1752                                         so = pmatch[0].rm_so;
1753                                         eo = pmatch[0].rm_eo;
1754                                         if (b[eo] != '\0')
1755                                                 break;
1756                                 }
1757                         } else if (c != '\0') {
1758                                 s = strchr(b+pp, c);
1759                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1760                                 if (s) {
1761                                         so = eo = s-b;
1762                                         eo++;
1763                                         break;
1764                                 }
1765                         } else {
1766                                 while (b[rp] == '\n')
1767                                         rp++;
1768                                 s = strstr(b+rp, "\n\n");
1769                                 if (s) {
1770                                         so = eo = s-b;
1771                                         while (b[eo] == '\n') eo++;
1772                                         if (b[eo] != '\0')
1773                                                 break;
1774                                 }
1775                         }
1776                 }
1777
1778                 if (a > 0) {
1779                         memmove(m, (const void *)(m+a), p+1);
1780                         b = m;
1781                         a = 0;
1782                 }
1783
1784                 qrealloc(&m, a+p+128, &size);
1785                 b = m + a;
1786                 pp = p;
1787                 p += safe_read(fd, b+p, size-p-1);
1788                 if (p < pp) {
1789                         p = 0;
1790                         r = 0;
1791                         setvar_i(intvar[ERRNO], errno);
1792                 }
1793                 b[p] = '\0';
1794
1795         } while (p > pp);
1796
1797         if (p == 0) {
1798                 r--;
1799         } else {
1800                 c = b[so]; b[so] = '\0';
1801                 setvar_s(v, b+rp);
1802                 v->type |= VF_USER;
1803                 b[so] = c;
1804                 c = b[eo]; b[eo] = '\0';
1805                 setvar_s(intvar[RT], b+so);
1806                 b[eo] = c;
1807         }
1808
1809         rsm->buffer = m;
1810         rsm->adv = a + eo;
1811         rsm->pos = p - eo;
1812         rsm->size = size;
1813
1814         return r;
1815 }
1816
1817 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1818 {
1819         int r = 0;
1820         char c;
1821         const char *s = format;
1822
1823         if (int_as_int && n == (int)n) {
1824                 r = snprintf(b, size, "%d", (int)n);
1825         } else {
1826                 do { c = *s; } while (c && *++s);
1827                 if (strchr("diouxX", c)) {
1828                         r = snprintf(b, size, format, (int)n);
1829                 } else if (strchr("eEfgG", c)) {
1830                         r = snprintf(b, size, format, n);
1831                 } else {
1832                         syntax_error(EMSG_INV_FMT);
1833                 }
1834         }
1835         return r;
1836 }
1837
1838
1839 /* formatted output into an allocated buffer, return ptr to buffer */
1840 static char *awk_printf(node *n)
1841 {
1842         char *b = NULL;
1843         char *fmt, *s, *f;
1844         const char *s1;
1845         int i, j, incr, bsize;
1846         char c, c1;
1847         var *v, *arg;
1848
1849         v = nvalloc(1);
1850         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1851
1852         i = 0;
1853         while (*f) {
1854                 s = f;
1855                 while (*f && (*f != '%' || *(++f) == '%'))
1856                         f++;
1857                 while (*f && !isalpha(*f)) {
1858                         if (*f == '*')
1859                                 syntax_error("%*x formats are not supported");
1860                         f++;
1861                 }
1862
1863                 incr = (f - s) + MAXVARFMT;
1864                 qrealloc(&b, incr + i, &bsize);
1865                 c = *f;
1866                 if (c != '\0') f++;
1867                 c1 = *f;
1868                 *f = '\0';
1869                 arg = evaluate(nextarg(&n), v);
1870
1871                 j = i;
1872                 if (c == 'c' || !c) {
1873                         i += sprintf(b+i, s, is_numeric(arg) ?
1874                                         (char)getvar_i(arg) : *getvar_s(arg));
1875                 } else if (c == 's') {
1876                         s1 = getvar_s(arg);
1877                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1878                         i += sprintf(b+i, s, s1);
1879                 } else {
1880                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1881                 }
1882                 *f = c1;
1883
1884                 /* if there was an error while sprintf, return value is negative */
1885                 if (i < j) i = j;
1886         }
1887
1888         b = xrealloc(b, i + 1);
1889         free(fmt);
1890         nvfree(v);
1891         b[i] = '\0';
1892         return b;
1893 }
1894
1895 /* common substitution routine
1896  * replace (nm) substring of (src) that match (n) with (repl), store
1897  * result into (dest), return number of substitutions. If nm=0, replace
1898  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1899  * subexpression matching (\1-\9)
1900  */
1901 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1902 {
1903         char *ds = NULL;
1904         const char *s;
1905         const char *sp;
1906         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1907         regmatch_t pmatch[10];
1908         regex_t sreg, *re;
1909
1910         re = as_regex(rn, &sreg);
1911         if (!src) src = intvar[F0];
1912         if (!dest) dest = intvar[F0];
1913
1914         i = di = 0;
1915         sp = getvar_s(src);
1916         rl = strlen(repl);
1917         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1918                 so = pmatch[0].rm_so;
1919                 eo = pmatch[0].rm_eo;
1920
1921                 qrealloc(&ds, di + eo + rl, &dssize);
1922                 memcpy(ds + di, sp, eo);
1923                 di += eo;
1924                 if (++i >= nm) {
1925                         /* replace */
1926                         di -= (eo - so);
1927                         nbs = 0;
1928                         for (s = repl; *s; s++) {
1929                                 ds[di++] = c = *s;
1930                                 if (c == '\\') {
1931                                         nbs++;
1932                                         continue;
1933                                 }
1934                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1935                                         di -= ((nbs + 3) >> 1);
1936                                         j = 0;
1937                                         if (c != '&') {
1938                                                 j = c - '0';
1939                                                 nbs++;
1940                                         }
1941                                         if (nbs % 2) {
1942                                                 ds[di++] = c;
1943                                         } else {
1944                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1945                                                 qrealloc(&ds, di + rl + n, &dssize);
1946                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1947                                                 di += n;
1948                                         }
1949                                 }
1950                                 nbs = 0;
1951                         }
1952                 }
1953
1954                 sp += eo;
1955                 if (i == nm) break;
1956                 if (eo == so) {
1957                         ds[di] = *sp++;
1958                         if (!ds[di++]) break;
1959                 }
1960         }
1961
1962         qrealloc(&ds, di + strlen(sp), &dssize);
1963         strcpy(ds + di, sp);
1964         setvar_p(dest, ds);
1965         if (re == &sreg) regfree(re);
1966         return i;
1967 }
1968
1969 static var *exec_builtin(node *op, var *res)
1970 {
1971 #define tspl (G.exec_builtin__tspl)
1972
1973         int (*to_xxx)(int);
1974         var *tv;
1975         node *an[4];
1976         var *av[4];
1977         const char *as[4];
1978         regmatch_t pmatch[2];
1979         regex_t sreg, *re;
1980         node *spl;
1981         uint32_t isr, info;
1982         int nargs;
1983         time_t tt;
1984         char *s, *s1;
1985         int i, l, ll, n;
1986
1987         tv = nvalloc(4);
1988         isr = info = op->info;
1989         op = op->l.n;
1990
1991         av[2] = av[3] = NULL;
1992         for (i = 0; i < 4 && op; i++) {
1993                 an[i] = nextarg(&op);
1994                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1995                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1996                 isr >>= 1;
1997         }
1998
1999         nargs = i;
2000         if (nargs < (info >> 30))
2001                 syntax_error(EMSG_TOO_FEW_ARGS);
2002
2003         switch (info & OPNMASK) {
2004
2005         case B_a2:
2006 #if ENABLE_FEATURE_AWK_MATH
2007                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2008 #else
2009                 syntax_error(EMSG_NO_MATH);
2010 #endif
2011                 break;
2012
2013         case B_sp:
2014                 if (nargs > 2) {
2015                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2016                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2017                 } else {
2018                         spl = &fsplitter.n;
2019                 }
2020
2021                 n = awk_split(as[0], spl, &s);
2022                 s1 = s;
2023                 clear_array(iamarray(av[1]));
2024                 for (i=1; i<=n; i++)
2025                         setari_u(av[1], i, nextword(&s1));
2026                 free(s);
2027                 setvar_i(res, n);
2028                 break;
2029
2030         case B_ss:
2031                 l = strlen(as[0]);
2032                 i = getvar_i(av[1]) - 1;
2033                 if (i > l) i = l;
2034                 if (i < 0) i = 0;
2035                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2036                 if (n < 0) n = 0;
2037                 s = xmalloc(n+1);
2038                 strncpy(s, as[0]+i, n);
2039                 s[n] = '\0';
2040                 setvar_p(res, s);
2041                 break;
2042
2043         case B_an:
2044                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2045                 break;
2046
2047         case B_co:
2048                 setvar_i(res, ~(long)getvar_i(av[0]));
2049                 break;
2050
2051         case B_ls:
2052                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2053                 break;
2054
2055         case B_or:
2056                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2057                 break;
2058
2059         case B_rs:
2060                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2061                 break;
2062
2063         case B_xo:
2064                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2065                 break;
2066
2067         case B_lo:
2068                 to_xxx = tolower;
2069                 goto lo_cont;
2070
2071         case B_up:
2072                 to_xxx = toupper;
2073  lo_cont:
2074                 s1 = s = xstrdup(as[0]);
2075                 while (*s1) {
2076                         *s1 = (*to_xxx)(*s1);
2077                         s1++;
2078                 }
2079                 setvar_p(res, s);
2080                 break;
2081
2082         case B_ix:
2083                 n = 0;
2084                 ll = strlen(as[1]);
2085                 l = strlen(as[0]) - ll;
2086                 if (ll > 0 && l >= 0) {
2087                         if (!icase) {
2088                                 s = strstr(as[0], as[1]);
2089                                 if (s) n = (s - as[0]) + 1;
2090                         } else {
2091                                 /* this piece of code is terribly slow and
2092                                  * really should be rewritten
2093                                  */
2094                                 for (i=0; i<=l; i++) {
2095                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2096                                                 n = i+1;
2097                                                 break;
2098                                         }
2099                                 }
2100                         }
2101                 }
2102                 setvar_i(res, n);
2103                 break;
2104
2105         case B_ti:
2106                 if (nargs > 1)
2107                         tt = getvar_i(av[1]);
2108                 else
2109                         time(&tt);
2110                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2111                 i = strftime(g_buf, MAXVARFMT,
2112                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2113                         localtime(&tt));
2114                 g_buf[i] = '\0';
2115                 setvar_s(res, g_buf);
2116                 break;
2117
2118         case B_ma:
2119                 re = as_regex(an[1], &sreg);
2120                 n = regexec(re, as[0], 1, pmatch, 0);
2121                 if (n == 0) {
2122                         pmatch[0].rm_so++;
2123                         pmatch[0].rm_eo++;
2124                 } else {
2125                         pmatch[0].rm_so = 0;
2126                         pmatch[0].rm_eo = -1;
2127                 }
2128                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2129                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2130                 setvar_i(res, pmatch[0].rm_so);
2131                 if (re == &sreg) regfree(re);
2132                 break;
2133
2134         case B_ge:
2135                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2136                 break;
2137
2138         case B_gs:
2139                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2140                 break;
2141
2142         case B_su:
2143                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2144                 break;
2145         }
2146
2147         nvfree(tv);
2148         return res;
2149 #undef tspl
2150 }
2151
2152 /*
2153  * Evaluate node - the heart of the program. Supplied with subtree
2154  * and place where to store result. returns ptr to result.
2155  */
2156 #define XC(n) ((n) >> 8)
2157
2158 static var *evaluate(node *op, var *res)
2159 {
2160 /* This procedure is recursive so we should count every byte */
2161 #define fnargs (G.evaluate__fnargs)
2162 /* seed is initialized to 1 */
2163 #define seed   (G.evaluate__seed)
2164 #define sreg   (G.evaluate__sreg)
2165
2166         node *op1;
2167         var *v1;
2168         union {
2169                 var *v;
2170                 const char *s;
2171                 double d;
2172                 int i;
2173         } L, R;
2174         uint32_t opinfo;
2175         int opn;
2176         union {
2177                 char *s;
2178                 rstream *rsm;
2179                 FILE *F;
2180                 var *v;
2181                 regex_t *re;
2182                 uint32_t info;
2183         } X;
2184
2185         if (!op)
2186                 return setvar_s(res, NULL);
2187
2188         v1 = nvalloc(2);
2189
2190         while (op) {
2191                 opinfo = op->info;
2192                 opn = (opinfo & OPNMASK);
2193                 g_lineno = op->lineno;
2194
2195                 /* execute inevitable things */
2196                 op1 = op->l.n;
2197                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2198                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2199                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2200                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2201                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2202
2203                 switch (XC(opinfo & OPCLSMASK)) {
2204
2205                 /* -- iterative node type -- */
2206
2207                 /* test pattern */
2208                 case XC( OC_TEST ):
2209                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2210                                 /* it's range pattern */
2211                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2212                                         op->info |= OF_CHECKED;
2213                                         if (ptest(op1->r.n))
2214                                                 op->info &= ~OF_CHECKED;
2215
2216                                         op = op->a.n;
2217                                 } else {
2218                                         op = op->r.n;
2219                                 }
2220                         } else {
2221                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2222                         }
2223                         break;
2224
2225                 /* just evaluate an expression, also used as unconditional jump */
2226                 case XC( OC_EXEC ):
2227                         break;
2228
2229                 /* branch, used in if-else and various loops */
2230                 case XC( OC_BR ):
2231                         op = istrue(L.v) ? op->a.n : op->r.n;
2232                         break;
2233
2234                 /* initialize for-in loop */
2235                 case XC( OC_WALKINIT ):
2236                         hashwalk_init(L.v, iamarray(R.v));
2237                         break;
2238
2239                 /* get next array item */
2240                 case XC( OC_WALKNEXT ):
2241                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2242                         break;
2243
2244                 case XC( OC_PRINT ):
2245                 case XC( OC_PRINTF ):
2246                         X.F = stdout;
2247                         if (op->r.n) {
2248                                 X.rsm = newfile(R.s);
2249                                 if (!X.rsm->F) {
2250                                         if (opn == '|') {
2251                                                 X.rsm->F = popen(R.s, "w");
2252                                                 if (X.rsm->F == NULL)
2253                                                         bb_perror_msg_and_die("popen");
2254                                                 X.rsm->is_pipe = 1;
2255                                         } else {
2256                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2257                                         }
2258                                 }
2259                                 X.F = X.rsm->F;
2260                         }
2261
2262                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2263                                 if (!op1) {
2264                                         fputs(getvar_s(intvar[F0]), X.F);
2265                                 } else {
2266                                         while (op1) {
2267                                                 L.v = evaluate(nextarg(&op1), v1);
2268                                                 if (L.v->type & VF_NUMBER) {
2269                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2270                                                                         getvar_i(L.v), TRUE);
2271                                                         fputs(g_buf, X.F);
2272                                                 } else {
2273                                                         fputs(getvar_s(L.v), X.F);
2274                                                 }
2275
2276                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2277                                         }
2278                                 }
2279                                 fputs(getvar_s(intvar[ORS]), X.F);
2280
2281                         } else {        /* OC_PRINTF */
2282                                 L.s = awk_printf(op1);
2283                                 fputs(L.s, X.F);
2284                                 free((char*)L.s);
2285                         }
2286                         fflush(X.F);
2287                         break;
2288
2289                 case XC( OC_DELETE ):
2290                         X.info = op1->info & OPCLSMASK;
2291                         if (X.info == OC_VAR) {
2292                                 R.v = op1->l.v;
2293                         } else if (X.info == OC_FNARG) {
2294                                 R.v = &fnargs[op1->l.i];
2295                         } else {
2296                                 syntax_error(EMSG_NOT_ARRAY);
2297                         }
2298
2299                         if (op1->r.n) {
2300                                 clrvar(L.v);
2301                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2302                                 hash_remove(iamarray(R.v), L.s);
2303                         } else {
2304                                 clear_array(iamarray(R.v));
2305                         }
2306                         break;
2307
2308                 case XC( OC_NEWSOURCE ):
2309                         g_progname = op->l.s;
2310                         break;
2311
2312                 case XC( OC_RETURN ):
2313                         copyvar(res, L.v);
2314                         break;
2315
2316                 case XC( OC_NEXTFILE ):
2317                         nextfile = TRUE;
2318                 case XC( OC_NEXT ):
2319                         nextrec = TRUE;
2320                 case XC( OC_DONE ):
2321                         clrvar(res);
2322                         break;
2323
2324                 case XC( OC_EXIT ):
2325                         awk_exit(L.d);
2326
2327                 /* -- recursive node type -- */
2328
2329                 case XC( OC_VAR ):
2330                         L.v = op->l.v;
2331                         if (L.v == intvar[NF])
2332                                 split_f0();
2333                         goto v_cont;
2334
2335                 case XC( OC_FNARG ):
2336                         L.v = &fnargs[op->l.i];
2337  v_cont:
2338                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2339                         break;
2340
2341                 case XC( OC_IN ):
2342                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2343                         break;
2344
2345                 case XC( OC_REGEXP ):
2346                         op1 = op;
2347                         L.s = getvar_s(intvar[F0]);
2348                         goto re_cont;
2349
2350                 case XC( OC_MATCH ):
2351                         op1 = op->r.n;
2352  re_cont:
2353                         X.re = as_regex(op1, &sreg);
2354                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2355                         if (X.re == &sreg) regfree(X.re);
2356                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2357                         break;
2358
2359                 case XC( OC_MOVE ):
2360                         /* if source is a temporary string, jusk relink it to dest */
2361                         if (R.v == v1+1 && R.v->string) {
2362                                 res = setvar_p(L.v, R.v->string);
2363                                 R.v->string = NULL;
2364                         } else {
2365                                 res = copyvar(L.v, R.v);
2366                         }
2367                         break;
2368
2369                 case XC( OC_TERNARY ):
2370                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2371                                 syntax_error(EMSG_POSSIBLE_ERROR);
2372                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2373                         break;
2374
2375                 case XC( OC_FUNC ):
2376                         if (!op->r.f->body.first)
2377                                 syntax_error(EMSG_UNDEF_FUNC);
2378
2379                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2380                         while (op1) {
2381                                 L.v = evaluate(nextarg(&op1), v1);
2382                                 copyvar(R.v, L.v);
2383                                 R.v->type |= VF_CHILD;
2384                                 R.v->x.parent = L.v;
2385                                 if (++R.v - X.v >= op->r.f->nargs)
2386                                         break;
2387                         }
2388
2389                         R.v = fnargs;
2390                         fnargs = X.v;
2391
2392                         L.s = g_progname;
2393                         res = evaluate(op->r.f->body.first, res);
2394                         g_progname = L.s;
2395
2396                         nvfree(fnargs);
2397                         fnargs = R.v;
2398                         break;
2399
2400                 case XC( OC_GETLINE ):
2401                 case XC( OC_PGETLINE ):
2402                         if (op1) {
2403                                 X.rsm = newfile(L.s);
2404                                 if (!X.rsm->F) {
2405                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2406                                                 X.rsm->F = popen(L.s, "r");
2407                                                 X.rsm->is_pipe = TRUE;
2408                                         } else {
2409                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2410                                         }
2411                                 }
2412                         } else {
2413                                 if (!iF) iF = next_input_file();
2414                                 X.rsm = iF;
2415                         }
2416
2417                         if (!X.rsm->F) {
2418                                 setvar_i(intvar[ERRNO], errno);
2419                                 setvar_i(res, -1);
2420                                 break;
2421                         }
2422
2423                         if (!op->r.n)
2424                                 R.v = intvar[F0];
2425
2426                         L.i = awk_getline(X.rsm, R.v);
2427                         if (L.i > 0) {
2428                                 if (!op1) {
2429                                         incvar(intvar[FNR]);
2430                                         incvar(intvar[NR]);
2431                                 }
2432                         }
2433                         setvar_i(res, L.i);
2434                         break;
2435
2436                 /* simple builtins */
2437                 case XC( OC_FBLTIN ):
2438                         switch (opn) {
2439
2440                         case F_in:
2441                                 R.d = (int)L.d;
2442                                 break;
2443
2444                         case F_rn:
2445                                 R.d = (double)rand() / (double)RAND_MAX;
2446                                 break;
2447 #if ENABLE_FEATURE_AWK_MATH
2448                         case F_co:
2449                                 R.d = cos(L.d);
2450                                 break;
2451
2452                         case F_ex:
2453                                 R.d = exp(L.d);
2454                                 break;
2455
2456                         case F_lg:
2457                                 R.d = log(L.d);
2458                                 break;
2459
2460                         case F_si:
2461                                 R.d = sin(L.d);
2462                                 break;
2463
2464                         case F_sq:
2465                                 R.d = sqrt(L.d);
2466                                 break;
2467 #else
2468                         case F_co:
2469                         case F_ex:
2470                         case F_lg:
2471                         case F_si:
2472                         case F_sq:
2473                                 syntax_error(EMSG_NO_MATH);
2474                                 break;
2475 #endif
2476                         case F_sr:
2477                                 R.d = (double)seed;
2478                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2479                                 srand(seed);
2480                                 break;
2481
2482                         case F_ti:
2483                                 R.d = time(NULL);
2484                                 break;
2485
2486                         case F_le:
2487                                 if (!op1)
2488                                         L.s = getvar_s(intvar[F0]);
2489                                 R.d = strlen(L.s);
2490                                 break;
2491
2492                         case F_sy:
2493                                 fflush(NULL);
2494                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2495                                                 ? (system(L.s) >> 8) : 0;
2496                                 break;
2497
2498                         case F_ff:
2499                                 if (!op1)
2500                                         fflush(stdout);
2501                                 else {
2502                                         if (L.s && *L.s) {
2503                                                 X.rsm = newfile(L.s);
2504                                                 fflush(X.rsm->F);
2505                                         } else {
2506                                                 fflush(NULL);
2507                                         }
2508                                 }
2509                                 break;
2510
2511                         case F_cl:
2512                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2513                                 if (X.rsm) {
2514                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2515                                         free(X.rsm->buffer);
2516                                         hash_remove(fdhash, L.s);
2517                                 }
2518                                 if (R.i != 0)
2519                                         setvar_i(intvar[ERRNO], errno);
2520                                 R.d = (double)R.i;
2521                                 break;
2522                         }
2523                         setvar_i(res, R.d);
2524                         break;
2525
2526                 case XC( OC_BUILTIN ):
2527                         res = exec_builtin(op, res);
2528                         break;
2529
2530                 case XC( OC_SPRINTF ):
2531                         setvar_p(res, awk_printf(op1));
2532                         break;
2533
2534                 case XC( OC_UNARY ):
2535                         X.v = R.v;
2536                         L.d = R.d = getvar_i(R.v);
2537                         switch (opn) {
2538                         case 'P':
2539                                 L.d = ++R.d;
2540                                 goto r_op_change;
2541                         case 'p':
2542                                 R.d++;
2543                                 goto r_op_change;
2544                         case 'M':
2545                                 L.d = --R.d;
2546                                 goto r_op_change;
2547                         case 'm':
2548                                 R.d--;
2549                                 goto r_op_change;
2550                         case '!':
2551                                 L.d = istrue(X.v) ? 0 : 1;
2552                                 break;
2553                         case '-':
2554                                 L.d = -R.d;
2555                                 break;
2556  r_op_change:
2557                                 setvar_i(X.v, R.d);
2558                         }
2559                         setvar_i(res, L.d);
2560                         break;
2561
2562                 case XC( OC_FIELD ):
2563                         R.i = (int)getvar_i(R.v);
2564                         if (R.i == 0) {
2565                                 res = intvar[F0];
2566                         } else {
2567                                 split_f0();
2568                                 if (R.i > nfields)
2569                                         fsrealloc(R.i);
2570                                 res = &Fields[R.i - 1];
2571                         }
2572                         break;
2573
2574                 /* concatenation (" ") and index joining (",") */
2575                 case XC( OC_CONCAT ):
2576                 case XC( OC_COMMA ):
2577                         opn = strlen(L.s) + strlen(R.s) + 2;
2578                         X.s = xmalloc(opn);
2579                         strcpy(X.s, L.s);
2580                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2581                                 L.s = getvar_s(intvar[SUBSEP]);
2582                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2583                                 strcat(X.s, L.s);
2584                         }
2585                         strcat(X.s, R.s);
2586                         setvar_p(res, X.s);
2587                         break;
2588
2589                 case XC( OC_LAND ):
2590                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2591                         break;
2592
2593                 case XC( OC_LOR ):
2594                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2595                         break;
2596
2597                 case XC( OC_BINARY ):
2598                 case XC( OC_REPLACE ):
2599                         R.d = getvar_i(R.v);
2600                         switch (opn) {
2601                         case '+':
2602                                 L.d += R.d;
2603                                 break;
2604                         case '-':
2605                                 L.d -= R.d;
2606                                 break;
2607                         case '*':
2608                                 L.d *= R.d;
2609                                 break;
2610                         case '/':
2611                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2612                                 L.d /= R.d;
2613                                 break;
2614                         case '&':
2615 #if ENABLE_FEATURE_AWK_MATH
2616                                 L.d = pow(L.d, R.d);
2617 #else
2618                                 syntax_error(EMSG_NO_MATH);
2619 #endif
2620                                 break;
2621                         case '%':
2622                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2623                                 L.d -= (int)(L.d / R.d) * R.d;
2624                                 break;
2625                         }
2626                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2627                         break;
2628
2629                 case XC( OC_COMPARE ):
2630                         if (is_numeric(L.v) && is_numeric(R.v)) {
2631                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2632                         } else {
2633                                 L.s = getvar_s(L.v);
2634                                 R.s = getvar_s(R.v);
2635                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2636                         }
2637                         switch (opn & 0xfe) {
2638                         case 0:
2639                                 R.i = (L.d > 0);
2640                                 break;
2641                         case 2:
2642                                 R.i = (L.d >= 0);
2643                                 break;
2644                         case 4:
2645                                 R.i = (L.d == 0);
2646                                 break;
2647                         }
2648                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2649                         break;
2650
2651                 default:
2652                         syntax_error(EMSG_POSSIBLE_ERROR);
2653                 }
2654                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2655                         op = op->a.n;
2656                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2657                         break;
2658                 if (nextrec)
2659                         break;
2660         }
2661         nvfree(v1);
2662         return res;
2663 #undef fnargs
2664 #undef seed
2665 #undef sreg
2666 }
2667
2668
2669 /* -------- main & co. -------- */
2670
2671 static int awk_exit(int r)
2672 {
2673         var tv;
2674         unsigned i;
2675         hash_item *hi;
2676
2677         zero_out_var(&tv);
2678
2679         if (!exiting) {
2680                 exiting = TRUE;
2681                 nextrec = FALSE;
2682                 evaluate(endseq.first, &tv);
2683         }
2684
2685         /* waiting for children */
2686         for (i = 0; i < fdhash->csize; i++) {
2687                 hi = fdhash->items[i];
2688                 while (hi) {
2689                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2690                                 pclose(hi->data.rs.F);
2691                         hi = hi->next;
2692                 }
2693         }
2694
2695         exit(r);
2696 }
2697
2698 /* if expr looks like "var=value", perform assignment and return 1,
2699  * otherwise return 0 */
2700 static int is_assignment(const char *expr)
2701 {
2702         char *exprc, *s, *s0, *s1;
2703
2704         exprc = xstrdup(expr);
2705         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2706                 free(exprc);
2707                 return FALSE;
2708         }
2709
2710         *(s++) = '\0';
2711         s0 = s1 = s;
2712         while (*s)
2713                 *(s1++) = nextchar(&s);
2714
2715         *s1 = '\0';
2716         setvar_u(newvar(exprc), s0);
2717         free(exprc);
2718         return TRUE;
2719 }
2720
2721 /* switch to next input file */
2722 static rstream *next_input_file(void)
2723 {
2724 #define rsm          (G.next_input_file__rsm)
2725 #define files_happen (G.next_input_file__files_happen)
2726
2727         FILE *F = NULL;
2728         const char *fname, *ind;
2729
2730         if (rsm.F) fclose(rsm.F);
2731         rsm.F = NULL;
2732         rsm.pos = rsm.adv = 0;
2733
2734         do {
2735                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2736                         if (files_happen)
2737                                 return NULL;
2738                         fname = "-";
2739                         F = stdin;
2740                 } else {
2741                         ind = getvar_s(incvar(intvar[ARGIND]));
2742                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2743                         if (fname && *fname && !is_assignment(fname))
2744                                 F = afopen(fname, "r");
2745                 }
2746         } while (!F);
2747
2748         files_happen = TRUE;
2749         setvar_s(intvar[FILENAME], fname);
2750         rsm.F = F;
2751         return &rsm;
2752 #undef rsm
2753 #undef files_happen
2754 }
2755
2756 int awk_main(int argc, char **argv);
2757 int awk_main(int argc, char **argv)
2758 {
2759         unsigned opt;
2760         char *opt_F, *opt_W;
2761         llist_t *opt_v = NULL;
2762         int i, j, flen;
2763         var *v;
2764         var tv;
2765         char **envp;
2766         char *vnames = (char *)vNames; /* cheat */
2767         char *vvalues = (char *)vValues;
2768
2769         INIT_G();
2770
2771         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2772          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2773         if (ENABLE_LOCALE_SUPPORT)
2774                 setlocale(LC_NUMERIC, "C");
2775
2776         zero_out_var(&tv);
2777
2778         /* allocate global buffer */
2779         g_buf = xmalloc(MAXVARFMT + 1);
2780
2781         vhash = hash_init();
2782         ahash = hash_init();
2783         fdhash = hash_init();
2784         fnhash = hash_init();
2785
2786         /* initialize variables */
2787         for (i = 0; *vnames; i++) {
2788                 intvar[i] = v = newvar(nextword(&vnames));
2789                 if (*vvalues != '\377')
2790                         setvar_s(v, nextword(&vvalues));
2791                 else
2792                         setvar_i(v, 0);
2793
2794                 if (*vnames == '*') {
2795                         v->type |= VF_SPECIAL;
2796                         vnames++;
2797                 }
2798         }
2799
2800         handle_special(intvar[FS]);
2801         handle_special(intvar[RS]);
2802
2803         newfile("/dev/stdin")->F = stdin;
2804         newfile("/dev/stdout")->F = stdout;
2805         newfile("/dev/stderr")->F = stderr;
2806
2807         /* Huh, people report that sometimes environ is NULL. Oh well. */
2808         if (environ) for (envp = environ; *envp; envp++) {
2809                 /* environ is writable, thus we don't strdup it needlessly */
2810                 char *s = *envp;
2811                 char *s1 = strchr(s, '=');
2812                 if (s1) {
2813                         *s1 = '\0';
2814                         /* Both findvar and setvar_u take const char*
2815                          * as 2nd arg -> environment is not trashed */
2816                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2817                         *s1 = '=';
2818                 }
2819         }
2820         opt_complementary = "v::";
2821         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2822         argv += optind;
2823         argc -= optind;
2824         if (opt & 0x1)
2825                 setvar_s(intvar[FS], opt_F); // -F
2826         while (opt_v) { /* -v */
2827                 if (!is_assignment(llist_pop(&opt_v)))
2828                         bb_show_usage();
2829         }
2830         if (opt & 0x4) { // -f
2831                 char *s = s; /* die, gcc, die */
2832                 FILE *from_file = afopen(g_progname, "r");
2833                 /* one byte is reserved for some trick in next_token */
2834                 if (fseek(from_file, 0, SEEK_END) == 0) {
2835                         flen = ftell(from_file);
2836                         s = xmalloc(flen + 4);
2837                         fseek(from_file, 0, SEEK_SET);
2838                         i = 1 + fread(s + 1, 1, flen, from_file);
2839                 } else {
2840                         for (i = j = 1; j > 0; i += j) {
2841                                 s = xrealloc(s, i + 4096);
2842                                 j = fread(s + i, 1, 4094, from_file);
2843                         }
2844                 }
2845                 s[i] = '\0';
2846                 fclose(from_file);
2847                 parse_program(s + 1);
2848                 free(s);
2849         } else { // no -f: take program from 1st parameter
2850                 if (!argc)
2851                         bb_show_usage();
2852                 g_progname = "cmd. line";
2853                 parse_program(*argv++);
2854                 argc--;
2855         }
2856         if (opt & 0x8) // -W
2857                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2858
2859         /* fill in ARGV array */
2860         setvar_i(intvar[ARGC], argc + 1);
2861         setari_u(intvar[ARGV], 0, "awk");
2862         i = 0;
2863         while (*argv)
2864                 setari_u(intvar[ARGV], ++i, *argv++);
2865
2866         evaluate(beginseq.first, &tv);
2867         if (!mainseq.first && !endseq.first)
2868                 awk_exit(EXIT_SUCCESS);
2869
2870         /* input file could already be opened in BEGIN block */
2871         if (!iF) iF = next_input_file();
2872
2873         /* passing through input files */
2874         while (iF) {
2875                 nextfile = FALSE;
2876                 setvar_i(intvar[FNR], 0);
2877
2878                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2879                         nextrec = FALSE;
2880                         incvar(intvar[NR]);
2881                         incvar(intvar[FNR]);
2882                         evaluate(mainseq.first, &tv);
2883
2884                         if (nextfile)
2885                                 break;
2886                 }
2887
2888                 if (i < 0)
2889                         syntax_error(strerror(errno));
2890
2891                 iF = next_input_file();
2892         }
2893
2894         awk_exit(EXIT_SUCCESS);
2895         /*return 0;*/
2896 }