remove useless casts (type*) xzalloc(...)
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT       240
16 #define MINNVBLOCK      64
17
18 /* variable flags */
19 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
20 #define VF_ARRAY        0x0002  /* 1 = it's an array */
21
22 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
23 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
25 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
27 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35         unsigned short type;            /* flags */
36         double number;
37         char *string;
38         union {
39                 int aidx;                               /* func arg idx (for compilation stage) */
40                 struct xhash_s *array;  /* array ptr */
41                 struct var_s *parent;   /* for func args, ptr to actual parameter */
42                 char **walker;                  /* list of array elements (for..in) */
43         } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48         struct node_s *first;
49         struct node_s *last;
50         char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55         unsigned short nargs;
56         struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61         FILE *F;
62         char *buffer;
63         int adv;
64         int size;
65         int pos;
66         unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70         union {
71                 struct var_s v;                 /* variable/array hash */
72                 struct rstream_s rs;    /* redirect streams hash */
73                 struct func_s f;                /* functions hash */
74         } data;
75         struct hash_item_s *next;       /* next in chain */
76         char name[1];                           /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80         unsigned int nel;                                       /* num of elements */
81         unsigned int csize;                                     /* current hash size */
82         unsigned int nprime;                            /* next hash size in PRIMES[] */
83         unsigned int glen;                                      /* summary length of item names */
84         struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89         uint32_t info;
90         unsigned short lineno;
91         union {
92                 struct node_s *n;
93                 var *v;
94                 int i;
95                 char *s;
96                 regex_t *re;
97         } l;
98         union {
99                 struct node_s *n;
100                 regex_t *ire;
101                 func *f;
102                 int argno;
103         } r;
104         union {
105                 struct node_s *n;
106         } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111         int size;
112         var *pos;
113         struct nvblock_s *prev;
114         struct nvblock_s *next;
115         var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119         node n;
120         regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!!  See next_token() */
125 #define TC_SEQSTART      1                              /* ( */
126 #define TC_SEQTERM      (1 << 1)                /* ) */
127 #define TC_REGEXP       (1 << 2)                /* /.../ */
128 #define TC_OUTRDR       (1 << 3)                /* | > >> */
129 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
130 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
131 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
132 #define TC_IN           (1 << 7)
133 #define TC_COMMA        (1 << 8)
134 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
135 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
136 #define TC_ARRTERM      (1 << 11)               /* ] */
137 #define TC_GRPSTART     (1 << 12)               /* { */
138 #define TC_GRPTERM      (1 << 13)               /* } */
139 #define TC_SEMICOL      (1 << 14)
140 #define TC_NEWLINE      (1 << 15)
141 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
142 #define TC_WHILE        (1 << 17)
143 #define TC_ELSE         (1 << 18)
144 #define TC_BUILTIN      (1 << 19)
145 #define TC_GETLINE      (1 << 20)
146 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
147 #define TC_BEGIN        (1 << 22)
148 #define TC_END          (1 << 23)
149 #define TC_EOF          (1 << 24)
150 #define TC_VARIABLE     (1 << 25)
151 #define TC_ARRAY        (1 << 26)
152 #define TC_FUNCTION     (1 << 27)
153 #define TC_STRING       (1 << 28)
154 #define TC_NUMBER       (1 << 29)
155
156 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
165 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173         TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183         TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1         0x010000
187 #define OF_RES2         0x020000
188 #define OF_STR1         0x040000
189 #define OF_STR2         0x080000
190 #define OF_NUM1         0x100000
191 #define OF_CHECKED      0x200000
192
193 /* combined operator flags */
194 #define xx      0
195 #define xV      OF_RES2
196 #define xS      (OF_RES2 | OF_STR2)
197 #define Vx      OF_RES1
198 #define VV      (OF_RES1 | OF_RES2)
199 #define Nx      (OF_RES1 | OF_NUM1)
200 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx      (OF_RES1 | OF_STR1)
202 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK       0xFF00
206 #define OPNMASK         0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211  */
212 #define P(x)    (x << 24)
213 #define PRIMASK         0x7F000000
214 #define PRIMASK2        0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS  0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
223         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
224
225         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
226         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
227         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
228
229         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
230         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
231         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
232         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
233         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
234         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
235         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
236         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
237         OC_DONE=0x2800,
238
239         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
240         ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
246         F_ti,   F_le,   F_sy,   F_ff,   F_cl
247 };
248
249 /* builtins */
250 enum {
251         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
252         B_ge,   B_gs,   B_su,
253         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
254 };
255
256 /* tokens and their corresponding info values */
257
258 #define NTC             "\377"          /* switch to next token class (tc<<1) */
259 #define NTCC    '\377'
260
261 #define OC_B    OC_BUILTIN
262
263 static char * const tokenlist =
264         "\1("           NTC
265         "\1)"           NTC
266         "\1/"           NTC                                                                     /* REGEXP */
267         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
268         "\2++"          "\2--"          NTC                                             /* UOPPOST */
269         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
270         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
271         "\2*="          "\2/="          "\2%="          "\2^="
272         "\1+"           "\1-"           "\3**="         "\2**"
273         "\1/"           "\1%"           "\1^"           "\1*"
274         "\2!="          "\2>="          "\2<="          "\1>"
275         "\1<"           "\2!~"          "\1~"           "\2&&"
276         "\2||"          "\1?"           "\1:"           NTC
277         "\2in"          NTC
278         "\1,"           NTC
279         "\1|"           NTC
280         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
281         "\1]"           NTC
282         "\1{"           NTC
283         "\1}"           NTC
284         "\1;"           NTC
285         "\1\n"          NTC
286         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
287         "\10continue"                   "\6delete"      "\5print"
288         "\6printf"      "\4next"        "\10nextfile"
289         "\6return"      "\4exit"        NTC
290         "\5while"       NTC
291         "\4else"        NTC
292
293         "\3and"         "\5compl"       "\6lshift"      "\2or"
294         "\6rshift"      "\3xor"
295         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
296         "\3cos"         "\3exp"         "\3int"         "\3log"
297         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
298         "\6gensub"      "\4gsub"        "\5index"       "\6length"
299         "\5match"       "\5split"       "\7sprintf"     "\3sub"
300         "\6substr"      "\7systime"     "\10strftime"
301         "\7tolower"     "\7toupper"     NTC
302         "\7getline"     NTC
303         "\4func"        "\10function"   NTC
304         "\5BEGIN"       NTC
305         "\3END"         "\0"
306         ;
307
308 static const uint32_t tokeninfo[] = {
309
310         0,
311         0,
312         OC_REGEXP,
313         xS|'a',         xS|'w',         xS|'|',
314         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
315         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
316                 OC_FIELD|xV|P(5),
317         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
318                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
319         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
320                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
321         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
322                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
323         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
324                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
325         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
326                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
327         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
328                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
329         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
330                 OC_COLON|xx|P(67)|':',
331         OC_IN|SV|P(49),
332         OC_COMMA|SS|P(80),
333         OC_PGETLINE|SV|P(37),
334         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
335                 OC_UNARY|xV|P(19)|'!',
336         0,
337         0,
338         0,
339         0,
340         0,
341         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
342         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
343         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
344         OC_RETURN|Vx,   OC_EXIT|Nx,
345         ST_WHILE,
346         0,
347
348         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
349         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
350         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
351         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
352         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
353         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
354         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
355         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
356         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
357         OC_GETLINE|SV|P(0),
358         0,      0,
359         0,
360         0
361 };
362
363 /* internal variable names and their initial values       */
364 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
365 enum {
366         CONVFMT=0,      OFMT,           FS,                     OFS,
367         ORS,            RS,                     RT,                     FILENAME,
368         SUBSEP,         ARGIND,         ARGC,           ARGV,
369         ERRNO,          FNR,
370         NR,                     NF,                     IGNORECASE,
371         ENVIRON,        F0,                     _intvarcount_
372 };
373
374 static char * vNames =
375         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
376         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
377         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
378         "ERRNO\0"       "FNR\0"
379         "NR\0"          "NF\0*"         "IGNORECASE\0*"
380         "ENVIRON\0"     "$\0*"          "\0";
381
382 static char * vValues =
383         "%.6g\0"        "%.6g\0"        " \0"           " \0"
384         "\n\0"          "\n\0"          "\0"            "\0"
385         "\034\0"
386         "\377";
387
388 /* hash size may grow to these values */
389 #define FIRST_PRIME 61;
390 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
391 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
392
393 /* globals */
394
395 extern char **environ;
396
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
401 static rstream *iF;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static char *programname;
404 static short lineno;
405 static int is_f0_split;
406 static int nfields;
407 static var *Fields;
408 static tsplitter fsplitter, rsplitter;
409 static nvblock *cb;
410 static char *pos;
411 static char *buf;
412 static int icase;
413 static int exiting;
414
415 static struct {
416         uint32_t tclass;
417         uint32_t info;
418         char *string;
419         double number;
420         short lineno;
421         int rollback;
422 } t;
423
424 /* function prototypes */
425 static void handle_special(var *);
426 static node *parse_expr(uint32_t);
427 static void chain_group(void);
428 static var *evaluate(node *, var *);
429 static rstream *next_input_file(void);
430 static int fmt_num(char *, int, const char *, double, int);
431 static int awk_exit(int) ATTRIBUTE_NORETURN;
432
433 /* ---- error handling ---- */
434
435 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
436 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
437 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
438 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
439 static const char EMSG_INV_FMT[] = "Invalid format specifier";
440 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
441 static const char EMSG_NOT_ARRAY[] = "Not an array";
442 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
443 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
444 #ifndef CONFIG_FEATURE_AWK_MATH
445 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
446 #endif
447
448 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
449 static void syntax_error(const char * const message)
450 {
451         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
452 }
453
454 #define runtime_error(x) syntax_error(x)
455
456
457 /* ---- hash stuff ---- */
458
459 static unsigned int hashidx(const char *name)
460 {
461         unsigned int idx=0;
462
463         while (*name)  idx = *name++ + (idx << 6) - idx;
464         return idx;
465 }
466
467 /* create new hash */
468 static xhash *hash_init(void)
469 {
470         xhash *newhash;
471
472         newhash = xzalloc(sizeof(xhash));
473         newhash->csize = FIRST_PRIME;
474         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
475
476         return newhash;
477 }
478
479 /* find item in hash, return ptr to data, NULL if not found */
480 static void *hash_search(xhash *hash, const char *name)
481 {
482         hash_item *hi;
483
484         hi = hash->items [ hashidx(name) % hash->csize ];
485         while (hi) {
486                 if (strcmp(hi->name, name) == 0)
487                         return &(hi->data);
488                 hi = hi->next;
489         }
490         return NULL;
491 }
492
493 /* grow hash if it becomes too big */
494 static void hash_rebuild(xhash *hash)
495 {
496         unsigned int newsize, i, idx;
497         hash_item **newitems, *hi, *thi;
498
499         if (hash->nprime == NPRIMES)
500                 return;
501
502         newsize = PRIMES[hash->nprime++];
503         newitems = xzalloc(newsize * sizeof(hash_item *));
504
505         for (i=0; i<hash->csize; i++) {
506                 hi = hash->items[i];
507                 while (hi) {
508                         thi = hi;
509                         hi = thi->next;
510                         idx = hashidx(thi->name) % newsize;
511                         thi->next = newitems[idx];
512                         newitems[idx] = thi;
513                 }
514         }
515
516         free(hash->items);
517         hash->csize = newsize;
518         hash->items = newitems;
519 }
520
521 /* find item in hash, add it if necessary. Return ptr to data */
522 static void *hash_find(xhash *hash, const char *name)
523 {
524         hash_item *hi;
525         unsigned int idx;
526         int l;
527
528         hi = hash_search(hash, name);
529         if (! hi) {
530                 if (++hash->nel / hash->csize > 10)
531                         hash_rebuild(hash);
532
533                 l = strlen(name) + 1;
534                 hi = xzalloc(sizeof(hash_item) + l);
535                 memcpy(hi->name, name, l);
536
537                 idx = hashidx(name) % hash->csize;
538                 hi->next = hash->items[idx];
539                 hash->items[idx] = hi;
540                 hash->glen += l;
541         }
542         return &(hi->data);
543 }
544
545 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
546 #define newvar(name) (var *) hash_find ( vhash , (name) )
547 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
548 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
549
550 static void hash_remove(xhash *hash, const char *name)
551 {
552         hash_item *hi, **phi;
553
554         phi = &(hash->items[ hashidx(name) % hash->csize ]);
555         while (*phi) {
556                 hi = *phi;
557                 if (strcmp(hi->name, name) == 0) {
558                         hash->glen -= (strlen(name) + 1);
559                         hash->nel--;
560                         *phi = hi->next;
561                         free(hi);
562                         break;
563                 }
564                 phi = &(hi->next);
565         }
566 }
567
568 /* ------ some useful functions ------ */
569
570 static void skip_spaces(char **s)
571 {
572         char *p = *s;
573
574         while(*p == ' ' || *p == '\t' ||
575                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
576                 p++;
577         }
578         *s = p;
579 }
580
581 static char *nextword(char **s)
582 {
583         char *p = *s;
584
585         while (*(*s)++) ;
586
587         return p;
588 }
589
590 static char nextchar(char **s)
591 {
592         char c, *pps;
593
594         c = *((*s)++);
595         pps = *s;
596         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
597         if (c == '\\' && *s == pps) c = *((*s)++);
598         return c;
599 }
600
601 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
602 {
603         return (isalnum(c) || c == '_');
604 }
605
606 static FILE *afopen(const char *path, const char *mode)
607 {
608         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
609 }
610
611 /* -------- working with variables (set/get/copy/etc) -------- */
612
613 static xhash *iamarray(var *v)
614 {
615         var *a = v;
616
617         while (a->type & VF_CHILD)
618                 a = a->x.parent;
619
620         if (! (a->type & VF_ARRAY)) {
621                 a->type |= VF_ARRAY;
622                 a->x.array = hash_init();
623         }
624         return a->x.array;
625 }
626
627 static void clear_array(xhash *array)
628 {
629         unsigned int i;
630         hash_item *hi, *thi;
631
632         for (i=0; i<array->csize; i++) {
633                 hi = array->items[i];
634                 while (hi) {
635                         thi = hi;
636                         hi = hi->next;
637                         free(thi->data.v.string);
638                         free(thi);
639                 }
640                 array->items[i] = NULL;
641         }
642         array->glen = array->nel = 0;
643 }
644
645 /* clear a variable */
646 static var *clrvar(var *v)
647 {
648         if (!(v->type & VF_FSTR))
649                 free(v->string);
650
651         v->type &= VF_DONTTOUCH;
652         v->type |= VF_DIRTY;
653         v->string = NULL;
654         return v;
655 }
656
657 /* assign string value to variable */
658 static var *setvar_p(var *v, char *value)
659 {
660         clrvar(v);
661         v->string = value;
662         handle_special(v);
663
664         return v;
665 }
666
667 /* same as setvar_p but make a copy of string */
668 static var *setvar_s(var *v, const char *value)
669 {
670         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
671 }
672
673 /* same as setvar_s but set USER flag */
674 static var *setvar_u(var *v, const char *value)
675 {
676         setvar_s(v, value);
677         v->type |= VF_USER;
678         return v;
679 }
680
681 /* set array element to user string */
682 static void setari_u(var *a, int idx, const char *s)
683 {
684         var *v;
685         static char sidx[12];
686
687         sprintf(sidx, "%d", idx);
688         v = findvar(iamarray(a), sidx);
689         setvar_u(v, s);
690 }
691
692 /* assign numeric value to variable */
693 static var *setvar_i(var *v, double value)
694 {
695         clrvar(v);
696         v->type |= VF_NUMBER;
697         v->number = value;
698         handle_special(v);
699         return v;
700 }
701
702 static char *getvar_s(var *v)
703 {
704         /* if v is numeric and has no cached string, convert it to string */
705         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
706                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
707                 v->string = xstrdup(buf);
708                 v->type |= VF_CACHED;
709         }
710         return (v->string == NULL) ? "" : v->string;
711 }
712
713 static double getvar_i(var *v)
714 {
715         char *s;
716
717         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
718                 v->number = 0;
719                 s = v->string;
720                 if (s && *s) {
721                         v->number = strtod(s, &s);
722                         if (v->type & VF_USER) {
723                                 skip_spaces(&s);
724                                 if (*s != '\0')
725                                         v->type &= ~VF_USER;
726                         }
727                 } else {
728                         v->type &= ~VF_USER;
729                 }
730                 v->type |= VF_CACHED;
731         }
732         return v->number;
733 }
734
735 static var *copyvar(var *dest, const var *src)
736 {
737         if (dest != src) {
738                 clrvar(dest);
739                 dest->type |= (src->type & ~VF_DONTTOUCH);
740                 dest->number = src->number;
741                 if (src->string)
742                         dest->string = xstrdup(src->string);
743         }
744         handle_special(dest);
745         return dest;
746 }
747
748 static var *incvar(var *v)
749 {
750         return setvar_i(v, getvar_i(v)+1.);
751 }
752
753 /* return true if v is number or numeric string */
754 static int is_numeric(var *v)
755 {
756         getvar_i(v);
757         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
758 }
759
760 /* return 1 when value of v corresponds to true, 0 otherwise */
761 static int istrue(var *v)
762 {
763         if (is_numeric(v))
764                 return (v->number == 0) ? 0 : 1;
765         else
766                 return (v->string && *(v->string)) ? 1 : 0;
767 }
768
769 /* temporary variables allocator. Last allocated should be first freed */
770 static var *nvalloc(int n)
771 {
772         nvblock *pb = NULL;
773         var *v, *r;
774         int size;
775
776         while (cb) {
777                 pb = cb;
778                 if ((cb->pos - cb->nv) + n <= cb->size) break;
779                 cb = cb->next;
780         }
781
782         if (! cb) {
783                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
784                 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
785                 cb->size = size;
786                 cb->pos = cb->nv;
787                 cb->prev = pb;
788                 cb->next = NULL;
789                 if (pb) pb->next = cb;
790         }
791
792         v = r = cb->pos;
793         cb->pos += n;
794
795         while (v < cb->pos) {
796                 v->type = 0;
797                 v->string = NULL;
798                 v++;
799         }
800
801         return r;
802 }
803
804 static void nvfree(var *v)
805 {
806         var *p;
807
808         if (v < cb->nv || v >= cb->pos)
809                 runtime_error(EMSG_INTERNAL_ERROR);
810
811         for (p=v; p<cb->pos; p++) {
812                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
813                         clear_array(iamarray(p));
814                         free(p->x.array->items);
815                         free(p->x.array);
816                 }
817                 if (p->type & VF_WALK)
818                         free(p->x.walker);
819
820                 clrvar(p);
821         }
822
823         cb->pos = v;
824         while (cb->prev && cb->pos == cb->nv) {
825                 cb = cb->prev;
826         }
827 }
828
829 /* ------- awk program text parsing ------- */
830
831 /* Parse next token pointed by global pos, place results into global t.
832  * If token isn't expected, give away. Return token class
833  */
834 static uint32_t next_token(uint32_t expected)
835 {
836         char *p, *pp, *s;
837         char *tl;
838         uint32_t tc;
839         const uint32_t *ti;
840         int l;
841         static int concat_inserted;
842         static uint32_t save_tclass, save_info;
843         static uint32_t ltclass = TC_OPTERM;
844
845         if (t.rollback) {
846
847                 t.rollback = FALSE;
848
849         } else if (concat_inserted) {
850
851                 concat_inserted = FALSE;
852                 t.tclass = save_tclass;
853                 t.info = save_info;
854
855         } else {
856
857                 p = pos;
858
859         readnext:
860                 skip_spaces(&p);
861                 lineno = t.lineno;
862                 if (*p == '#')
863                         while (*p != '\n' && *p != '\0') p++;
864
865                 if (*p == '\n')
866                         t.lineno++;
867
868                 if (*p == '\0') {
869                         tc = TC_EOF;
870
871                 } else if (*p == '\"') {
872                         /* it's a string */
873                         t.string = s = ++p;
874                         while (*p != '\"') {
875                                 if (*p == '\0' || *p == '\n')
876                                         syntax_error(EMSG_UNEXP_EOS);
877                                 *(s++) = nextchar(&p);
878                         }
879                         p++;
880                         *s = '\0';
881                         tc = TC_STRING;
882
883                 } else if ((expected & TC_REGEXP) && *p == '/') {
884                         /* it's regexp */
885                         t.string = s = ++p;
886                         while (*p != '/') {
887                                 if (*p == '\0' || *p == '\n')
888                                         syntax_error(EMSG_UNEXP_EOS);
889                                 if ((*s++ = *p++) == '\\') {
890                                         pp = p;
891                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
892                                         if (*pp == '\\') *s++ = '\\';
893                                         if (p == pp) *s++ = *p++;
894                                 }
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_REGEXP;
899
900                 } else if (*p == '.' || isdigit(*p)) {
901                         /* it's a number */
902                         t.number = strtod(p, &p);
903                         if (*p == '.')
904                                 syntax_error(EMSG_UNEXP_TOKEN);
905                         tc = TC_NUMBER;
906
907                 } else {
908                         /* search for something known */
909                         tl = tokenlist;
910                         tc = 0x00000001;
911                         ti = tokeninfo;
912                         while (*tl) {
913                                 l = *(tl++);
914                                 if (l == NTCC) {
915                                         tc <<= 1;
916                                         continue;
917                                 }
918                                 /* if token class is expected, token
919                                  * matches and it's not a longer word,
920                                  * then this is what we are looking for
921                                  */
922                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
923                                 *tl == *p && strncmp(p, tl, l) == 0 &&
924                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
925                                         t.info = *ti;
926                                         p += l;
927                                         break;
928                                 }
929                                 ti++;
930                                 tl += l;
931                         }
932
933                         if (! *tl) {
934                                 /* it's a name (var/array/function),
935                                  * otherwise it's something wrong
936                                  */
937                                 if (! isalnum_(*p))
938                                         syntax_error(EMSG_UNEXP_TOKEN);
939
940                                 t.string = --p;
941                                 while(isalnum_(*(++p))) {
942                                         *(p-1) = *p;
943                                 }
944                                 *(p-1) = '\0';
945                                 tc = TC_VARIABLE;
946                                 /* also consume whitespace between functionname and bracket */
947                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
948                                 if (*p == '(') {
949                                         tc = TC_FUNCTION;
950                                 } else {
951                                         if (*p == '[') {
952                                                 p++;
953                                                 tc = TC_ARRAY;
954                                         }
955                                 }
956                         }
957                 }
958                 pos = p;
959
960                 /* skipping newlines in some cases */
961                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
962                         goto readnext;
963
964                 /* insert concatenation operator when needed */
965                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
966                         concat_inserted = TRUE;
967                         save_tclass = tc;
968                         save_info = t.info;
969                         tc = TC_BINOP;
970                         t.info = OC_CONCAT | SS | P(35);
971                 }
972
973                 t.tclass = tc;
974         }
975         ltclass = t.tclass;
976
977         /* Are we ready for this? */
978         if (! (ltclass & expected))
979                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
980                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
981
982         return ltclass;
983 }
984
985 static void rollback_token(void) { t.rollback = TRUE; }
986
987 static node *new_node(uint32_t info)
988 {
989         node *n;
990
991         n = xzalloc(sizeof(node));
992         n->info = info;
993         n->lineno = lineno;
994         return n;
995 }
996
997 static node *mk_re_node(char *s, node *n, regex_t *re)
998 {
999         n->info = OC_REGEXP;
1000         n->l.re = re;
1001         n->r.ire = re + 1;
1002         xregcomp(re, s, REG_EXTENDED);
1003         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1004
1005         return n;
1006 }
1007
1008 static node *condition(void)
1009 {
1010         next_token(TC_SEQSTART);
1011         return parse_expr(TC_SEQTERM);
1012 }
1013
1014 /* parse expression terminated by given argument, return ptr
1015  * to built subtree. Terminator is eaten by parse_expr */
1016 static node *parse_expr(uint32_t iexp)
1017 {
1018         node sn;
1019         node *cn = &sn;
1020         node *vn, *glptr;
1021         uint32_t tc, xtc;
1022         var *v;
1023
1024         sn.info = PRIMASK;
1025         sn.r.n = glptr = NULL;
1026         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1027
1028         while (! ((tc = next_token(xtc)) & iexp)) {
1029                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1030                         /* input redirection (<) attached to glptr node */
1031                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1032                         cn->a.n = glptr;
1033                         xtc = TC_OPERAND | TC_UOPPRE;
1034                         glptr = NULL;
1035
1036                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1037                         /* for binary and postfix-unary operators, jump back over
1038                          * previous operators with higher priority */
1039                         vn = cn;
1040                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1041                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1042                                 vn = vn->a.n;
1043                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1044                                 t.info += P(6);
1045                         cn = vn->a.n->r.n = new_node(t.info);
1046                         cn->a.n = vn->a.n;
1047                         if (tc & TC_BINOP) {
1048                                 cn->l.n = vn;
1049                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1050                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1051                                         /* it's a pipe */
1052                                         next_token(TC_GETLINE);
1053                                         /* give maximum priority to this pipe */
1054                                         cn->info &= ~PRIMASK;
1055                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1056                                 }
1057                         } else {
1058                                 cn->r.n = vn;
1059                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060                         }
1061                         vn->a.n = cn;
1062
1063                 } else {
1064                         /* for operands and prefix-unary operators, attach them
1065                          * to last node */
1066                         vn = cn;
1067                         cn = vn->r.n = new_node(t.info);
1068                         cn->a.n = vn;
1069                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1070                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1071                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1072                                 /* one should be very careful with switch on tclass -
1073                                  * only simple tclasses should be used! */
1074                                 switch (tc) {
1075                                   case TC_VARIABLE:
1076                                   case TC_ARRAY:
1077                                         cn->info = OC_VAR;
1078                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1079                                                 cn->info = OC_FNARG;
1080                                                 cn->l.i = v->x.aidx;
1081                                         } else {
1082                                                 cn->l.v = newvar(t.string);
1083                                         }
1084                                         if (tc & TC_ARRAY) {
1085                                                 cn->info |= xS;
1086                                                 cn->r.n = parse_expr(TC_ARRTERM);
1087                                         }
1088                                         break;
1089
1090                                   case TC_NUMBER:
1091                                   case TC_STRING:
1092                                         cn->info = OC_VAR;
1093                                         v = cn->l.v = xzalloc(sizeof(var));
1094                                         if (tc & TC_NUMBER)
1095                                                 setvar_i(v, t.number);
1096                                         else
1097                                                 setvar_s(v, t.string);
1098                                         break;
1099
1100                                   case TC_REGEXP:
1101                                         mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1102                                         break;
1103
1104                                   case TC_FUNCTION:
1105                                         cn->info = OC_FUNC;
1106                                         cn->r.f = newfunc(t.string);
1107                                         cn->l.n = condition();
1108                                         break;
1109
1110                                   case TC_SEQSTART:
1111                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1112                                         cn->a.n = vn;
1113                                         break;
1114
1115                                   case TC_GETLINE:
1116                                         glptr = cn;
1117                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1118                                         break;
1119
1120                                   case TC_BUILTIN:
1121                                         cn->l.n = condition();
1122                                         break;
1123                                 }
1124                         }
1125                 }
1126         }
1127         return sn.r.n;
1128 }
1129
1130 /* add node to chain. Return ptr to alloc'd node */
1131 static node *chain_node(uint32_t info)
1132 {
1133         node *n;
1134
1135         if (! seq->first)
1136                 seq->first = seq->last = new_node(0);
1137
1138         if (seq->programname != programname) {
1139                 seq->programname = programname;
1140                 n = chain_node(OC_NEWSOURCE);
1141                 n->l.s = xstrdup(programname);
1142         }
1143
1144         n = seq->last;
1145         n->info = info;
1146         seq->last = n->a.n = new_node(OC_DONE);
1147
1148         return n;
1149 }
1150
1151 static void chain_expr(uint32_t info)
1152 {
1153         node *n;
1154
1155         n = chain_node(info);
1156         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1157         if (t.tclass & TC_GRPTERM)
1158                 rollback_token();
1159 }
1160
1161 static node *chain_loop(node *nn)
1162 {
1163         node *n, *n2, *save_brk, *save_cont;
1164
1165         save_brk = break_ptr;
1166         save_cont = continue_ptr;
1167
1168         n = chain_node(OC_BR | Vx);
1169         continue_ptr = new_node(OC_EXEC);
1170         break_ptr = new_node(OC_EXEC);
1171         chain_group();
1172         n2 = chain_node(OC_EXEC | Vx);
1173         n2->l.n = nn;
1174         n2->a.n = n;
1175         continue_ptr->a.n = n2;
1176         break_ptr->a.n = n->r.n = seq->last;
1177
1178         continue_ptr = save_cont;
1179         break_ptr = save_brk;
1180
1181         return n;
1182 }
1183
1184 /* parse group and attach it to chain */
1185 static void chain_group(void)
1186 {
1187         uint32_t c;
1188         node *n, *n2, *n3;
1189
1190         do {
1191                 c = next_token(TC_GRPSEQ);
1192         } while (c & TC_NEWLINE);
1193
1194         if (c & TC_GRPSTART) {
1195                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1196                         if (t.tclass & TC_NEWLINE) continue;
1197                         rollback_token();
1198                         chain_group();
1199                 }
1200         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1201                 rollback_token();
1202                 chain_expr(OC_EXEC | Vx);
1203         } else {                                                /* TC_STATEMNT */
1204                 switch (t.info & OPCLSMASK) {
1205                         case ST_IF:
1206                                 n = chain_node(OC_BR | Vx);
1207                                 n->l.n = condition();
1208                                 chain_group();
1209                                 n2 = chain_node(OC_EXEC);
1210                                 n->r.n = seq->last;
1211                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1212                                         chain_group();
1213                                         n2->a.n = seq->last;
1214                                 } else {
1215                                         rollback_token();
1216                                 }
1217                                 break;
1218
1219                         case ST_WHILE:
1220                                 n2 = condition();
1221                                 n = chain_loop(NULL);
1222                                 n->l.n = n2;
1223                                 break;
1224
1225                         case ST_DO:
1226                                 n2 = chain_node(OC_EXEC);
1227                                 n = chain_loop(NULL);
1228                                 n2->a.n = n->a.n;
1229                                 next_token(TC_WHILE);
1230                                 n->l.n = condition();
1231                                 break;
1232
1233                         case ST_FOR:
1234                                 next_token(TC_SEQSTART);
1235                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1236                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1237                                         if ((n2->info & OPCLSMASK) != OC_IN)
1238                                                 syntax_error(EMSG_UNEXP_TOKEN);
1239                                         n = chain_node(OC_WALKINIT | VV);
1240                                         n->l.n = n2->l.n;
1241                                         n->r.n = n2->r.n;
1242                                         n = chain_loop(NULL);
1243                                         n->info = OC_WALKNEXT | Vx;
1244                                         n->l.n = n2->l.n;
1245                                 } else {                                                                        /* for(;;) */
1246                                         n = chain_node(OC_EXEC | Vx);
1247                                         n->l.n = n2;
1248                                         n2 = parse_expr(TC_SEMICOL);
1249                                         n3 = parse_expr(TC_SEQTERM);
1250                                         n = chain_loop(n3);
1251                                         n->l.n = n2;
1252                                         if (! n2)
1253                                                 n->info = OC_EXEC;
1254                                 }
1255                                 break;
1256
1257                         case OC_PRINT:
1258                         case OC_PRINTF:
1259                                 n = chain_node(t.info);
1260                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1261                                 if (t.tclass & TC_OUTRDR) {
1262                                         n->info |= t.info;
1263                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1264                                 }
1265                                 if (t.tclass & TC_GRPTERM)
1266                                         rollback_token();
1267                                 break;
1268
1269                         case OC_BREAK:
1270                                 n = chain_node(OC_EXEC);
1271                                 n->a.n = break_ptr;
1272                                 break;
1273
1274                         case OC_CONTINUE:
1275                                 n = chain_node(OC_EXEC);
1276                                 n->a.n = continue_ptr;
1277                                 break;
1278
1279                         /* delete, next, nextfile, return, exit */
1280                         default:
1281                                 chain_expr(t.info);
1282
1283                 }
1284         }
1285 }
1286
1287 static void parse_program(char *p)
1288 {
1289         uint32_t tclass;
1290         node *cn;
1291         func *f;
1292         var *v;
1293
1294         pos = p;
1295         t.lineno = 1;
1296         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1297                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1298
1299                 if (tclass & TC_OPTERM)
1300                         continue;
1301
1302                 seq = &mainseq;
1303                 if (tclass & TC_BEGIN) {
1304                         seq = &beginseq;
1305                         chain_group();
1306
1307                 } else if (tclass & TC_END) {
1308                         seq = &endseq;
1309                         chain_group();
1310
1311                 } else if (tclass & TC_FUNCDECL) {
1312                         next_token(TC_FUNCTION);
1313                         pos++;
1314                         f = newfunc(t.string);
1315                         f->body.first = NULL;
1316                         f->nargs = 0;
1317                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1318                                 v = findvar(ahash, t.string);
1319                                 v->x.aidx = (f->nargs)++;
1320
1321                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1322                                         break;
1323                         }
1324                         seq = &(f->body);
1325                         chain_group();
1326                         clear_array(ahash);
1327
1328                 } else if (tclass & TC_OPSEQ) {
1329                         rollback_token();
1330                         cn = chain_node(OC_TEST);
1331                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1332                         if (t.tclass & TC_GRPSTART) {
1333                                 rollback_token();
1334                                 chain_group();
1335                         } else {
1336                                 chain_node(OC_PRINT);
1337                         }
1338                         cn->r.n = mainseq.last;
1339
1340                 } else /* if (tclass & TC_GRPSTART) */ {
1341                         rollback_token();
1342                         chain_group();
1343                 }
1344         }
1345 }
1346
1347
1348 /* -------- program execution part -------- */
1349
1350 static node *mk_splitter(char *s, tsplitter *spl)
1351 {
1352         regex_t *re, *ire;
1353         node *n;
1354
1355         re = &spl->re[0];
1356         ire = &spl->re[1];
1357         n = &spl->n;
1358         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1359                 regfree(re);
1360                 regfree(ire);
1361         }
1362         if (strlen(s) > 1) {
1363                 mk_re_node(s, n, re);
1364         } else {
1365                 n->info = (uint32_t) *s;
1366         }
1367
1368         return n;
1369 }
1370
1371 /* use node as a regular expression. Supplied with node ptr and regex_t
1372  * storage space. Return ptr to regex (if result points to preg, it should
1373  * be later regfree'd manually
1374  */
1375 static regex_t *as_regex(node *op, regex_t *preg)
1376 {
1377         var *v;
1378         char *s;
1379
1380         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1381                 return icase ? op->r.ire : op->l.re;
1382         } else {
1383                 v = nvalloc(1);
1384                 s = getvar_s(evaluate(op, v));
1385                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1386                 nvfree(v);
1387                 return preg;
1388         }
1389 }
1390
1391 /* gradually increasing buffer */
1392 static void qrealloc(char **b, int n, int *size)
1393 {
1394         if (! *b || n >= *size)
1395                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1396 }
1397
1398 /* resize field storage space */
1399 static void fsrealloc(int size)
1400 {
1401         static int maxfields = 0;
1402         int i;
1403
1404         if (size >= maxfields) {
1405                 i = maxfields;
1406                 maxfields = size + 16;
1407                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1408                 for (; i<maxfields; i++) {
1409                         Fields[i].type = VF_SPECIAL;
1410                         Fields[i].string = NULL;
1411                 }
1412         }
1413
1414         if (size < nfields) {
1415                 for (i=size; i<nfields; i++) {
1416                         clrvar(Fields+i);
1417                 }
1418         }
1419         nfields = size;
1420 }
1421
1422 static int awk_split(char *s, node *spl, char **slist)
1423 {
1424         int l, n=0;
1425         char c[4];
1426         char *s1;
1427         regmatch_t pmatch[2];
1428
1429         /* in worst case, each char would be a separate field */
1430         *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1431
1432         c[0] = c[1] = (char)spl->info;
1433         c[2] = c[3] = '\0';
1434         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1435
1436         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1437                 while (*s) {
1438                         l = strcspn(s, c+2);
1439                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1440                         pmatch[0].rm_so <= l) {
1441                                 l = pmatch[0].rm_so;
1442                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1443                         } else {
1444                                 pmatch[0].rm_eo = l;
1445                                 if (*(s+l)) pmatch[0].rm_eo++;
1446                         }
1447
1448                         memcpy(s1, s, l);
1449                         *(s1+l) = '\0';
1450                         nextword(&s1);
1451                         s += pmatch[0].rm_eo;
1452                         n++;
1453                 }
1454         } else if (c[0] == '\0') {              /* null split */
1455                 while(*s) {
1456                         *(s1++) = *(s++);
1457                         *(s1++) = '\0';
1458                         n++;
1459                 }
1460         } else if (c[0] != ' ') {               /* single-character split */
1461                 if (icase) {
1462                         c[0] = toupper(c[0]);
1463                         c[1] = tolower(c[1]);
1464                 }
1465                 if (*s1) n++;
1466                 while ((s1 = strpbrk(s1, c))) {
1467                         *(s1++) = '\0';
1468                         n++;
1469                 }
1470         } else {                                /* space split */
1471                 while (*s) {
1472                         s = skip_whitespace(s);
1473                         if (! *s) break;
1474                         n++;
1475                         while (*s && !isspace(*s))
1476                                 *(s1++) = *(s++);
1477                         *(s1++) = '\0';
1478                 }
1479         }
1480         return n;
1481 }
1482
1483 static void split_f0(void)
1484 {
1485         static char *fstrings = NULL;
1486         int i, n;
1487         char *s;
1488
1489         if (is_f0_split)
1490                 return;
1491
1492         is_f0_split = TRUE;
1493         free(fstrings);
1494         fsrealloc(0);
1495         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1496         fsrealloc(n);
1497         s = fstrings;
1498         for (i=0; i<n; i++) {
1499                 Fields[i].string = nextword(&s);
1500                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1501         }
1502
1503         /* set NF manually to avoid side effects */
1504         clrvar(V[NF]);
1505         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1506         V[NF]->number = nfields;
1507 }
1508
1509 /* perform additional actions when some internal variables changed */
1510 static void handle_special(var *v)
1511 {
1512         int n;
1513         char *b, *sep, *s;
1514         int sl, l, len, i, bsize;
1515
1516         if (! (v->type & VF_SPECIAL))
1517                 return;
1518
1519         if (v == V[NF]) {
1520                 n = (int)getvar_i(v);
1521                 fsrealloc(n);
1522
1523                 /* recalculate $0 */
1524                 sep = getvar_s(V[OFS]);
1525                 sl = strlen(sep);
1526                 b = NULL;
1527                 len = 0;
1528                 for (i=0; i<n; i++) {
1529                         s = getvar_s(&Fields[i]);
1530                         l = strlen(s);
1531                         if (b) {
1532                                 memcpy(b+len, sep, sl);
1533                                 len += sl;
1534                         }
1535                         qrealloc(&b, len+l+sl, &bsize);
1536                         memcpy(b+len, s, l);
1537                         len += l;
1538                 }
1539                 if (b) b[len] = '\0';
1540                 setvar_p(V[F0], b);
1541                 is_f0_split = TRUE;
1542
1543         } else if (v == V[F0]) {
1544                 is_f0_split = FALSE;
1545
1546         } else if (v == V[FS]) {
1547                 mk_splitter(getvar_s(v), &fsplitter);
1548
1549         } else if (v == V[RS]) {
1550                 mk_splitter(getvar_s(v), &rsplitter);
1551
1552         } else if (v == V[IGNORECASE]) {
1553                 icase = istrue(v);
1554
1555         } else {                                                /* $n */
1556                 n = getvar_i(V[NF]);
1557                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1558                 /* right here v is invalid. Just to note... */
1559         }
1560 }
1561
1562 /* step through func/builtin/etc arguments */
1563 static node *nextarg(node **pn)
1564 {
1565         node *n;
1566
1567         n = *pn;
1568         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1569                 *pn = n->r.n;
1570                 n = n->l.n;
1571         } else {
1572                 *pn = NULL;
1573         }
1574         return n;
1575 }
1576
1577 static void hashwalk_init(var *v, xhash *array)
1578 {
1579         char **w;
1580         hash_item *hi;
1581         int i;
1582
1583         if (v->type & VF_WALK)
1584                 free(v->x.walker);
1585
1586         v->type |= VF_WALK;
1587         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1588         *w = *(w+1) = (char *)(w + 2);
1589         for (i=0; i<array->csize; i++) {
1590                 hi = array->items[i];
1591                 while(hi) {
1592                         strcpy(*w, hi->name);
1593                         nextword(w);
1594                         hi = hi->next;
1595                 }
1596         }
1597 }
1598
1599 static int hashwalk_next(var *v)
1600 {
1601         char **w;
1602
1603         w = v->x.walker;
1604         if (*(w+1) == *w)
1605                 return FALSE;
1606
1607         setvar_s(v, nextword(w+1));
1608         return TRUE;
1609 }
1610
1611 /* evaluate node, return 1 when result is true, 0 otherwise */
1612 static int ptest(node *pattern)
1613 {
1614         static var v;
1615         return istrue(evaluate(pattern, &v));
1616 }
1617
1618 /* read next record from stream rsm into a variable v */
1619 static int awk_getline(rstream *rsm, var *v)
1620 {
1621         char *b;
1622         regmatch_t pmatch[2];
1623         int a, p, pp=0, size;
1624         int fd, so, eo, r, rp;
1625         char c, *m, *s;
1626
1627         /* we're using our own buffer since we need access to accumulating
1628          * characters
1629          */
1630         fd = fileno(rsm->F);
1631         m = rsm->buffer;
1632         a = rsm->adv;
1633         p = rsm->pos;
1634         size = rsm->size;
1635         c = (char) rsplitter.n.info;
1636         rp = 0;
1637
1638         if (! m) qrealloc(&m, 256, &size);
1639         do {
1640                 b = m + a;
1641                 so = eo = p;
1642                 r = 1;
1643                 if (p > 0) {
1644                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1645                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1646                                                                                                 b, 1, pmatch, 0) == 0) {
1647                                         so = pmatch[0].rm_so;
1648                                         eo = pmatch[0].rm_eo;
1649                                         if (b[eo] != '\0')
1650                                                 break;
1651                                 }
1652                         } else if (c != '\0') {
1653                                 s = strchr(b+pp, c);
1654                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1655                                 if (s) {
1656                                         so = eo = s-b;
1657                                         eo++;
1658                                         break;
1659                                 }
1660                         } else {
1661                                 while (b[rp] == '\n')
1662                                         rp++;
1663                                 s = strstr(b+rp, "\n\n");
1664                                 if (s) {
1665                                         so = eo = s-b;
1666                                         while (b[eo] == '\n') eo++;
1667                                         if (b[eo] != '\0')
1668                                                 break;
1669                                 }
1670                         }
1671                 }
1672
1673                 if (a > 0) {
1674                         memmove(m, (const void *)(m+a), p+1);
1675                         b = m;
1676                         a = 0;
1677                 }
1678
1679                 qrealloc(&m, a+p+128, &size);
1680                 b = m + a;
1681                 pp = p;
1682                 p += safe_read(fd, b+p, size-p-1);
1683                 if (p < pp) {
1684                         p = 0;
1685                         r = 0;
1686                         setvar_i(V[ERRNO], errno);
1687                 }
1688                 b[p] = '\0';
1689
1690         } while (p > pp);
1691
1692         if (p == 0) {
1693                 r--;
1694         } else {
1695                 c = b[so]; b[so] = '\0';
1696                 setvar_s(v, b+rp);
1697                 v->type |= VF_USER;
1698                 b[so] = c;
1699                 c = b[eo]; b[eo] = '\0';
1700                 setvar_s(V[RT], b+so);
1701                 b[eo] = c;
1702         }
1703
1704         rsm->buffer = m;
1705         rsm->adv = a + eo;
1706         rsm->pos = p - eo;
1707         rsm->size = size;
1708
1709         return r;
1710 }
1711
1712 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1713 {
1714         int r=0;
1715         char c;
1716         const char *s=format;
1717
1718         if (int_as_int && n == (int)n) {
1719                 r = snprintf(b, size, "%d", (int)n);
1720         } else {
1721                 do { c = *s; } while (*s && *++s);
1722                 if (strchr("diouxX", c)) {
1723                         r = snprintf(b, size, format, (int)n);
1724                 } else if (strchr("eEfgG", c)) {
1725                         r = snprintf(b, size, format, n);
1726                 } else {
1727                         runtime_error(EMSG_INV_FMT);
1728                 }
1729         }
1730         return r;
1731 }
1732
1733
1734 /* formatted output into an allocated buffer, return ptr to buffer */
1735 static char *awk_printf(node *n)
1736 {
1737         char *b = NULL;
1738         char *fmt, *s, *s1, *f;
1739         int i, j, incr, bsize;
1740         char c, c1;
1741         var *v, *arg;
1742
1743         v = nvalloc(1);
1744         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1745
1746         i = 0;
1747         while (*f) {
1748                 s = f;
1749                 while (*f && (*f != '%' || *(++f) == '%'))
1750                         f++;
1751                 while (*f && !isalpha(*f))
1752                         f++;
1753
1754                 incr = (f - s) + MAXVARFMT;
1755                 qrealloc(&b, incr+i, &bsize);
1756                 c = *f; if (c != '\0') f++;
1757                 c1 = *f ; *f = '\0';
1758                 arg = evaluate(nextarg(&n), v);
1759
1760                 j = i;
1761                 if (c == 'c' || !c) {
1762                         i += sprintf(b+i, s,
1763                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1764
1765                 } else if (c == 's') {
1766                         s1 = getvar_s(arg);
1767                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1768                         i += sprintf(b+i, s, s1);
1769
1770                 } else {
1771                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1772                 }
1773                 *f = c1;
1774
1775                 /* if there was an error while sprintf, return value is negative */
1776                 if (i < j) i = j;
1777
1778         }
1779
1780         b = xrealloc(b, i+1);
1781         free(fmt);
1782         nvfree(v);
1783         b[i] = '\0';
1784         return b;
1785 }
1786
1787 /* common substitution routine
1788  * replace (nm) substring of (src) that match (n) with (repl), store
1789  * result into (dest), return number of substitutions. If nm=0, replace
1790  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1791  * subexpression matching (\1-\9)
1792  */
1793 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1794 {
1795         char *ds = NULL;
1796         char *sp, *s;
1797         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1798         regmatch_t pmatch[10];
1799         regex_t sreg, *re;
1800
1801         re = as_regex(rn, &sreg);
1802         if (! src) src = V[F0];
1803         if (! dest) dest = V[F0];
1804
1805         i = di = 0;
1806         sp = getvar_s(src);
1807         rl = strlen(repl);
1808         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1809                 so = pmatch[0].rm_so;
1810                 eo = pmatch[0].rm_eo;
1811
1812                 qrealloc(&ds, di + eo + rl, &dssize);
1813                 memcpy(ds + di, sp, eo);
1814                 di += eo;
1815                 if (++i >= nm) {
1816                         /* replace */
1817                         di -= (eo - so);
1818                         nbs = 0;
1819                         for (s = repl; *s; s++) {
1820                                 ds[di++] = c = *s;
1821                                 if (c == '\\') {
1822                                         nbs++;
1823                                         continue;
1824                                 }
1825                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1826                                         di -= ((nbs + 3) >> 1);
1827                                         j = 0;
1828                                         if (c != '&') {
1829                                                 j = c - '0';
1830                                                 nbs++;
1831                                         }
1832                                         if (nbs % 2) {
1833                                                 ds[di++] = c;
1834                                         } else {
1835                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1836                                                 qrealloc(&ds, di + rl + n, &dssize);
1837                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1838                                                 di += n;
1839                                         }
1840                                 }
1841                                 nbs = 0;
1842                         }
1843                 }
1844
1845                 sp += eo;
1846                 if (i == nm) break;
1847                 if (eo == so) {
1848                         if (! (ds[di++] = *sp++)) break;
1849                 }
1850         }
1851
1852         qrealloc(&ds, di + strlen(sp), &dssize);
1853         strcpy(ds + di, sp);
1854         setvar_p(dest, ds);
1855         if (re == &sreg) regfree(re);
1856         return i;
1857 }
1858
1859 static var *exec_builtin(node *op, var *res)
1860 {
1861         int (*to_xxx)(int);
1862         var *tv;
1863         node *an[4];
1864         var  *av[4];
1865         char *as[4];
1866         regmatch_t pmatch[2];
1867         regex_t sreg, *re;
1868         static tsplitter tspl;
1869         node *spl;
1870         uint32_t isr, info;
1871         int nargs;
1872         time_t tt;
1873         char *s, *s1;
1874         int i, l, ll, n;
1875
1876         tv = nvalloc(4);
1877         isr = info = op->info;
1878         op = op->l.n;
1879
1880         av[2] = av[3] = NULL;
1881         for (i=0 ; i<4 && op ; i++) {
1882                 an[i] = nextarg(&op);
1883                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1884                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1885                 isr >>= 1;
1886         }
1887
1888         nargs = i;
1889         if (nargs < (info >> 30))
1890                 runtime_error(EMSG_TOO_FEW_ARGS);
1891
1892         switch (info & OPNMASK) {
1893
1894           case B_a2:
1895 #ifdef CONFIG_FEATURE_AWK_MATH
1896                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1897 #else
1898                 runtime_error(EMSG_NO_MATH);
1899 #endif
1900                 break;
1901
1902           case B_sp:
1903                 if (nargs > 2) {
1904                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1905                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1906                 } else {
1907                         spl = &fsplitter.n;
1908                 }
1909
1910                 n = awk_split(as[0], spl, &s);
1911                 s1 = s;
1912                 clear_array(iamarray(av[1]));
1913                 for (i=1; i<=n; i++)
1914                         setari_u(av[1], i, nextword(&s1));
1915                 free(s);
1916                 setvar_i(res, n);
1917                 break;
1918
1919           case B_ss:
1920                 l = strlen(as[0]);
1921                 i = getvar_i(av[1]) - 1;
1922                 if (i>l) i=l; if (i<0) i=0;
1923                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1924                 if (n<0) n=0;
1925                 s = xmalloc(n+1);
1926                 strncpy(s, as[0]+i, n);
1927                 s[n] = '\0';
1928                 setvar_p(res, s);
1929                 break;
1930                 
1931          case B_an:
1932                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1933                 break;
1934                 
1935          case B_co:
1936                 setvar_i(res, ~(long)getvar_i(av[0]));
1937                 break;
1938
1939          case B_ls:
1940                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1941                 break;
1942
1943          case B_or:
1944                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1945                 break;
1946
1947          case B_rs:
1948                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1949                 break;
1950
1951          case B_xo:
1952                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1953                 break;
1954
1955           case B_lo:
1956                 to_xxx = tolower;
1957                 goto lo_cont;
1958
1959           case B_up:
1960                 to_xxx = toupper;
1961 lo_cont:
1962                 s1 = s = xstrdup(as[0]);
1963                 while (*s1) {
1964                         *s1 = (*to_xxx)(*s1);
1965                         s1++;
1966                 }
1967                 setvar_p(res, s);
1968                 break;
1969
1970           case B_ix:
1971                 n = 0;
1972                 ll = strlen(as[1]);
1973                 l = strlen(as[0]) - ll;
1974                 if (ll > 0 && l >= 0) {
1975                         if (! icase) {
1976                                 s = strstr(as[0], as[1]);
1977                                 if (s) n = (s - as[0]) + 1;
1978                         } else {
1979                                 /* this piece of code is terribly slow and
1980                                  * really should be rewritten
1981                                  */
1982                                 for (i=0; i<=l; i++) {
1983                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1984                                                 n = i+1;
1985                                                 break;
1986                                         }
1987                                 }
1988                         }
1989                 }
1990                 setvar_i(res, n);
1991                 break;
1992
1993           case B_ti:
1994                 if (nargs > 1)
1995                         tt = getvar_i(av[1]);
1996                 else
1997                         time(&tt);
1998                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1999                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2000                 buf[i] = '\0';
2001                 setvar_s(res, buf);
2002                 break;
2003
2004           case B_ma:
2005                 re = as_regex(an[1], &sreg);
2006                 n = regexec(re, as[0], 1, pmatch, 0);
2007                 if (n == 0) {
2008                         pmatch[0].rm_so++;
2009                         pmatch[0].rm_eo++;
2010                 } else {
2011                         pmatch[0].rm_so = 0;
2012                         pmatch[0].rm_eo = -1;
2013                 }
2014                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2015                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2016                 setvar_i(res, pmatch[0].rm_so);
2017                 if (re == &sreg) regfree(re);
2018                 break;
2019
2020           case B_ge:
2021                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2022                 break;
2023
2024           case B_gs:
2025                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2026                 break;
2027
2028           case B_su:
2029                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2030                 break;
2031         }
2032
2033         nvfree(tv);
2034         return res;
2035 }
2036
2037 /*
2038  * Evaluate node - the heart of the program. Supplied with subtree
2039  * and place where to store result. returns ptr to result.
2040  */
2041 #define XC(n) ((n) >> 8)
2042
2043 static var *evaluate(node *op, var *res)
2044 {
2045         /* This procedure is recursive so we should count every byte */
2046         static var *fnargs = NULL;
2047         static unsigned int seed = 1;
2048         static regex_t sreg;
2049         node *op1;
2050         var *v1;
2051         union {
2052                 var *v;
2053                 char *s;
2054                 double d;
2055                 int i;
2056         } L, R;
2057         uint32_t opinfo;
2058         short opn;
2059         union {
2060                 char *s;
2061                 rstream *rsm;
2062                 FILE *F;
2063                 var *v;
2064                 regex_t *re;
2065                 uint32_t info;
2066         } X;
2067
2068         if (! op)
2069                 return setvar_s(res, NULL);
2070
2071         v1 = nvalloc(2);
2072
2073         while (op) {
2074
2075                 opinfo = op->info;
2076                 opn = (short)(opinfo & OPNMASK);
2077                 lineno = op->lineno;
2078
2079                 /* execute inevitable things */
2080                 op1 = op->l.n;
2081                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2082                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2083                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2084                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2085                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2086
2087                 switch (XC(opinfo & OPCLSMASK)) {
2088
2089                   /* -- iterative node type -- */
2090
2091                   /* test pattern */
2092                   case XC( OC_TEST ):
2093                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2094                                 /* it's range pattern */
2095                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2096                                         op->info |= OF_CHECKED;
2097                                         if (ptest(op1->r.n))
2098                                                 op->info &= ~OF_CHECKED;
2099
2100                                         op = op->a.n;
2101                                 } else {
2102                                         op = op->r.n;
2103                                 }
2104                         } else {
2105                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2106                         }
2107                         break;
2108
2109                   /* just evaluate an expression, also used as unconditional jump */
2110                   case XC( OC_EXEC ):
2111                         break;
2112
2113                   /* branch, used in if-else and various loops */
2114                   case XC( OC_BR ):
2115                         op = istrue(L.v) ? op->a.n : op->r.n;
2116                         break;
2117
2118                   /* initialize for-in loop */
2119                   case XC( OC_WALKINIT ):
2120                         hashwalk_init(L.v, iamarray(R.v));
2121                         break;
2122
2123                   /* get next array item */
2124                   case XC( OC_WALKNEXT ):
2125                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2126                         break;
2127
2128                   case XC( OC_PRINT ):
2129                   case XC( OC_PRINTF ):
2130                         X.F = stdout;
2131                         if (op->r.n) {
2132                                 X.rsm = newfile(R.s);
2133                                 if (! X.rsm->F) {
2134                                         if (opn == '|') {
2135                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2136                                                         bb_perror_msg_and_die("popen");
2137                                                 X.rsm->is_pipe = 1;
2138                                         } else {
2139                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2140                                         }
2141                                 }
2142                                 X.F = X.rsm->F;
2143                         }
2144
2145                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2146                                 if (! op1) {
2147                                         fputs(getvar_s(V[F0]), X.F);
2148                                 } else {
2149                                         while (op1) {
2150                                                 L.v = evaluate(nextarg(&op1), v1);
2151                                                 if (L.v->type & VF_NUMBER) {
2152                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2153                                                                         getvar_i(L.v), TRUE);
2154                                                         fputs(buf, X.F);
2155                                                 } else {
2156                                                         fputs(getvar_s(L.v), X.F);
2157                                                 }
2158
2159                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2160                                         }
2161                                 }
2162                                 fputs(getvar_s(V[ORS]), X.F);
2163
2164                         } else {        /* OC_PRINTF */
2165                                 L.s = awk_printf(op1);
2166                                 fputs(L.s, X.F);
2167                                 free(L.s);
2168                         }
2169                         fflush(X.F);
2170                         break;
2171
2172                   case XC( OC_DELETE ):
2173                         X.info = op1->info & OPCLSMASK;
2174                         if (X.info == OC_VAR) {
2175                                 R.v = op1->l.v;
2176                         } else if (X.info == OC_FNARG) {
2177                                 R.v = &fnargs[op1->l.i];
2178                         } else {
2179                                 runtime_error(EMSG_NOT_ARRAY);
2180                         }
2181
2182                         if (op1->r.n) {
2183                                 clrvar(L.v);
2184                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2185                                 hash_remove(iamarray(R.v), L.s);
2186                         } else {
2187                                 clear_array(iamarray(R.v));
2188                         }
2189                         break;
2190
2191                   case XC( OC_NEWSOURCE ):
2192                         programname = op->l.s;
2193                         break;
2194
2195                   case XC( OC_RETURN ):
2196                         copyvar(res, L.v);
2197                         break;
2198
2199                   case XC( OC_NEXTFILE ):
2200                         nextfile = TRUE;
2201                   case XC( OC_NEXT ):
2202                         nextrec = TRUE;
2203                   case XC( OC_DONE ):
2204                         clrvar(res);
2205                         break;
2206
2207                   case XC( OC_EXIT ):
2208                         awk_exit(L.d);
2209
2210                   /* -- recursive node type -- */
2211
2212                   case XC( OC_VAR ):
2213                         L.v = op->l.v;
2214                         if (L.v == V[NF])
2215                                 split_f0();
2216                         goto v_cont;
2217
2218                   case XC( OC_FNARG ):
2219                         L.v = &fnargs[op->l.i];
2220
2221 v_cont:
2222                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2223                         break;
2224
2225                   case XC( OC_IN ):
2226                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2227                         break;
2228
2229                   case XC( OC_REGEXP ):
2230                         op1 = op;
2231                         L.s = getvar_s(V[F0]);
2232                         goto re_cont;
2233
2234                   case XC( OC_MATCH ):
2235                         op1 = op->r.n;
2236 re_cont:
2237                         X.re = as_regex(op1, &sreg);
2238                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2239                         if (X.re == &sreg) regfree(X.re);
2240                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2241                         break;
2242
2243                   case XC( OC_MOVE ):
2244                         /* if source is a temporary string, jusk relink it to dest */
2245                         if (R.v == v1+1 && R.v->string) {
2246                                 res = setvar_p(L.v, R.v->string);
2247                                 R.v->string = NULL;
2248                         } else {
2249                                 res = copyvar(L.v, R.v);
2250                         }
2251                         break;
2252
2253                   case XC( OC_TERNARY ):
2254                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2255                                 runtime_error(EMSG_POSSIBLE_ERROR);
2256                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2257                         break;
2258
2259                   case XC( OC_FUNC ):
2260                         if (! op->r.f->body.first)
2261                                 runtime_error(EMSG_UNDEF_FUNC);
2262
2263                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2264                         while (op1) {
2265                                 L.v = evaluate(nextarg(&op1), v1);
2266                                 copyvar(R.v, L.v);
2267                                 R.v->type |= VF_CHILD;
2268                                 R.v->x.parent = L.v;
2269                                 if (++R.v - X.v >= op->r.f->nargs)
2270                                         break;
2271                         }
2272
2273                         R.v = fnargs;
2274                         fnargs = X.v;
2275
2276                         L.s = programname;
2277                         res = evaluate(op->r.f->body.first, res);
2278                         programname = L.s;
2279
2280                         nvfree(fnargs);
2281                         fnargs = R.v;
2282                         break;
2283
2284                   case XC( OC_GETLINE ):
2285                   case XC( OC_PGETLINE ):
2286                         if (op1) {
2287                                 X.rsm = newfile(L.s);
2288                                 if (! X.rsm->F) {
2289                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2290                                                 X.rsm->F = popen(L.s, "r");
2291                                                 X.rsm->is_pipe = TRUE;
2292                                         } else {
2293                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2294                                         }
2295                                 }
2296                         } else {
2297                                 if (! iF) iF = next_input_file();
2298                                 X.rsm = iF;
2299                         }
2300
2301                         if (! X.rsm->F) {
2302                                 setvar_i(V[ERRNO], errno);
2303                                 setvar_i(res, -1);
2304                                 break;
2305                         }
2306
2307                         if (! op->r.n)
2308                                 R.v = V[F0];
2309
2310                         L.i = awk_getline(X.rsm, R.v);
2311                         if (L.i > 0) {
2312                                 if (! op1) {
2313                                         incvar(V[FNR]);
2314                                         incvar(V[NR]);
2315                                 }
2316                         }
2317                         setvar_i(res, L.i);
2318                         break;
2319
2320                   /* simple builtins */
2321                   case XC( OC_FBLTIN ):
2322                         switch (opn) {
2323
2324                           case F_in:
2325                                 R.d = (int)L.d;
2326                                 break;
2327
2328                           case F_rn:
2329                                 R.d =  (double)rand() / (double)RAND_MAX;
2330                                 break;
2331
2332 #ifdef CONFIG_FEATURE_AWK_MATH
2333                           case F_co:
2334                                 R.d = cos(L.d);
2335                                 break;
2336
2337                           case F_ex:
2338                                 R.d = exp(L.d);
2339                                 break;
2340
2341                           case F_lg:
2342                                 R.d = log(L.d);
2343                                 break;
2344
2345                           case F_si:
2346                                 R.d = sin(L.d);
2347                                 break;
2348
2349                           case F_sq:
2350                                 R.d = sqrt(L.d);
2351                                 break;
2352 #else
2353                           case F_co:
2354                           case F_ex:
2355                           case F_lg:
2356                           case F_si:
2357                           case F_sq:
2358                                 runtime_error(EMSG_NO_MATH);
2359                                 break;
2360 #endif
2361
2362                           case F_sr:
2363                                 R.d = (double)seed;
2364                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2365                                 srand(seed);
2366                                 break;
2367
2368                           case F_ti:
2369                                 R.d = time(NULL);
2370                                 break;
2371
2372                           case F_le:
2373                                 if (! op1)
2374                                         L.s = getvar_s(V[F0]);
2375                                 R.d = strlen(L.s);
2376                                 break;
2377
2378                           case F_sy:
2379                                 fflush(NULL);
2380                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2381                                                 ? (system(L.s) >> 8) : 0;
2382                                 break;
2383
2384                           case F_ff:
2385                                 if (! op1)
2386                                         fflush(stdout);
2387                                 else {
2388                                         if (L.s && *L.s) {
2389                                                 X.rsm = newfile(L.s);
2390                                                 fflush(X.rsm->F);
2391                                         } else {
2392                                                 fflush(NULL);
2393                                         }
2394                                 }
2395                                 break;
2396
2397                           case F_cl:
2398                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2399                                 if (X.rsm) {
2400                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2401                                         free(X.rsm->buffer);
2402                                         hash_remove(fdhash, L.s);
2403                                 }
2404                                 if (R.i != 0)
2405                                         setvar_i(V[ERRNO], errno);
2406                                 R.d = (double)R.i;
2407                                 break;
2408                         }
2409                         setvar_i(res, R.d);
2410                         break;
2411
2412                   case XC( OC_BUILTIN ):
2413                         res = exec_builtin(op, res);
2414                         break;
2415
2416                   case XC( OC_SPRINTF ):
2417                         setvar_p(res, awk_printf(op1));
2418                         break;
2419
2420                   case XC( OC_UNARY ):
2421                         X.v = R.v;
2422                         L.d = R.d = getvar_i(R.v);
2423                         switch (opn) {
2424                           case 'P':
2425                                 L.d = ++R.d;
2426                                 goto r_op_change;
2427                           case 'p':
2428                                 R.d++;
2429                                 goto r_op_change;
2430                           case 'M':
2431                                 L.d = --R.d;
2432                                 goto r_op_change;
2433                           case 'm':
2434                                 R.d--;
2435                                 goto r_op_change;
2436                           case '!':
2437                                 L.d = istrue(X.v) ? 0 : 1;
2438                                 break;
2439                           case '-':
2440                                 L.d = -R.d;
2441                                 break;
2442                         r_op_change:
2443                                 setvar_i(X.v, R.d);
2444                         }
2445                         setvar_i(res, L.d);
2446                         break;
2447
2448                   case XC( OC_FIELD ):
2449                         R.i = (int)getvar_i(R.v);
2450                         if (R.i == 0) {
2451                                 res = V[F0];
2452                         } else {
2453                                 split_f0();
2454                                 if (R.i > nfields)
2455                                         fsrealloc(R.i);
2456
2457                                 res = &Fields[R.i-1];
2458                         }
2459                         break;
2460
2461                   /* concatenation (" ") and index joining (",") */
2462                   case XC( OC_CONCAT ):
2463                   case XC( OC_COMMA ):
2464                         opn = strlen(L.s) + strlen(R.s) + 2;
2465                         X.s = xmalloc(opn);
2466                         strcpy(X.s, L.s);
2467                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2468                                 L.s = getvar_s(V[SUBSEP]);
2469                                 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2470                                 strcat(X.s, L.s);
2471                         }
2472                         strcat(X.s, R.s);
2473                         setvar_p(res, X.s);
2474                         break;
2475
2476                   case XC( OC_LAND ):
2477                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2478                         break;
2479
2480                   case XC( OC_LOR ):
2481                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2482                         break;
2483
2484                   case XC( OC_BINARY ):
2485                   case XC( OC_REPLACE ):
2486                         R.d = getvar_i(R.v);
2487                         switch (opn) {
2488                           case '+':
2489                                 L.d += R.d;
2490                                 break;
2491                           case '-':
2492                                 L.d -= R.d;
2493                                 break;
2494                           case '*':
2495                                 L.d *= R.d;
2496                                 break;
2497                           case '/':
2498                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499                                 L.d /= R.d;
2500                                 break;
2501                           case '&':
2502 #ifdef CONFIG_FEATURE_AWK_MATH
2503                                 L.d = pow(L.d, R.d);
2504 #else
2505                                 runtime_error(EMSG_NO_MATH);
2506 #endif
2507                                 break;
2508                           case '%':
2509                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2510                                 L.d -= (int)(L.d / R.d) * R.d;
2511                                 break;
2512                         }
2513                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2514                         break;
2515
2516                   case XC( OC_COMPARE ):
2517                         if (is_numeric(L.v) && is_numeric(R.v)) {
2518                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2519                         } else {
2520                                 L.s = getvar_s(L.v);
2521                                 R.s = getvar_s(R.v);
2522                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2523                         }
2524                         switch (opn & 0xfe) {
2525                           case 0:
2526                                 R.i = (L.d > 0);
2527                                 break;
2528                           case 2:
2529                                 R.i = (L.d >= 0);
2530                                 break;
2531                           case 4:
2532                                 R.i = (L.d == 0);
2533                                 break;
2534                         }
2535                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2536                         break;
2537
2538                   default:
2539                         runtime_error(EMSG_POSSIBLE_ERROR);
2540                 }
2541                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2542                         op = op->a.n;
2543                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2544                         break;
2545                 if (nextrec)
2546                         break;
2547         }
2548         nvfree(v1);
2549         return res;
2550 }
2551
2552
2553 /* -------- main & co. -------- */
2554
2555 static int awk_exit(int r)
2556 {
2557         unsigned int i;
2558         hash_item *hi;
2559         static var tv;
2560
2561         if (! exiting) {
2562                 exiting = TRUE;
2563                 nextrec = FALSE;
2564                 evaluate(endseq.first, &tv);
2565         }
2566
2567         /* waiting for children */
2568         for (i=0; i<fdhash->csize; i++) {
2569                 hi = fdhash->items[i];
2570                 while(hi) {
2571                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2572                                 pclose(hi->data.rs.F);
2573                         hi = hi->next;
2574                 }
2575         }
2576
2577         exit(r);
2578 }
2579
2580 /* if expr looks like "var=value", perform assignment and return 1,
2581  * otherwise return 0 */
2582 static int is_assignment(const char *expr)
2583 {
2584         char *exprc, *s, *s0, *s1;
2585
2586         exprc = xstrdup(expr);
2587         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2588                 free(exprc);
2589                 return FALSE;
2590         }
2591
2592         *(s++) = '\0';
2593         s0 = s1 = s;
2594         while (*s)
2595                 *(s1++) = nextchar(&s);
2596
2597         *s1 = '\0';
2598         setvar_u(newvar(exprc), s0);
2599         free(exprc);
2600         return TRUE;
2601 }
2602
2603 /* switch to next input file */
2604 static rstream *next_input_file(void)
2605 {
2606         static rstream rsm;
2607         FILE *F = NULL;
2608         char *fname, *ind;
2609         static int files_happen = FALSE;
2610
2611         if (rsm.F) fclose(rsm.F);
2612         rsm.F = NULL;
2613         rsm.pos = rsm.adv = 0;
2614
2615         do {
2616                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2617                         if (files_happen)
2618                                 return NULL;
2619                         fname = "-";
2620                         F = stdin;
2621                 } else {
2622                         ind = getvar_s(incvar(V[ARGIND]));
2623                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2624                         if (fname && *fname && !is_assignment(fname))
2625                                 F = afopen(fname, "r");
2626                 }
2627         } while (!F);
2628
2629         files_happen = TRUE;
2630         setvar_s(V[FILENAME], fname);
2631         rsm.F = F;
2632         return &rsm;
2633 }
2634
2635 int awk_main(int argc, char **argv)
2636 {
2637         unsigned opt;
2638         char *opt_F, *opt_v, *opt_W;
2639         char *s, *s1;
2640         int i, j, c, flen;
2641         var *v;
2642         static var tv;
2643         char **envp;
2644         static int from_file = FALSE;
2645         rstream *rsm;
2646         FILE *F, *stdfiles[3];
2647         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2648
2649         /* allocate global buffer */
2650         buf = xmalloc(MAXVARFMT+1);
2651
2652         vhash = hash_init();
2653         ahash = hash_init();
2654         fdhash = hash_init();
2655         fnhash = hash_init();
2656
2657         /* initialize variables */
2658         for (i=0;  *vNames;  i++) {
2659                 V[i] = v = newvar(nextword(&vNames));
2660                 if (*vValues != '\377')
2661                         setvar_s(v, nextword(&vValues));
2662                 else
2663                         setvar_i(v, 0);
2664
2665                 if (*vNames == '*') {
2666                         v->type |= VF_SPECIAL;
2667                         vNames++;
2668                 }
2669         }
2670
2671         handle_special(V[FS]);
2672         handle_special(V[RS]);
2673
2674         stdfiles[0] = stdin;
2675         stdfiles[1] = stdout;
2676         stdfiles[2] = stderr;
2677         for (i=0; i<3; i++) {
2678                 rsm = newfile(nextword(&stdnames));
2679                 rsm->F = stdfiles[i];
2680         }
2681
2682         for (envp=environ; *envp; envp++) {
2683                 s = xstrdup(*envp);
2684                 s1 = strchr(s, '=');
2685                 if (!s1) {
2686                         goto keep_going;
2687                 }
2688                 *(s1++) = '\0';
2689                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2690 keep_going:
2691                 free(s);
2692         }
2693
2694         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2695         if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2696         if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2697         if (opt & 0x4) { // -f
2698                 from_file = TRUE;
2699                 F = afopen(programname, "r");
2700                 s = NULL;
2701                 /* one byte is reserved for some trick in next_token */
2702                 if (fseek(F, 0, SEEK_END) == 0) {
2703                         flen = ftell(F);
2704                         s = xmalloc(flen+4);
2705                         fseek(F, 0, SEEK_SET);
2706                         i = 1 + fread(s+1, 1, flen, F);
2707                 } else {
2708                         for (i=j=1; j>0; i+=j) {
2709                                 s = (char *)xrealloc(s, i+4096);
2710                                 j = fread(s+i, 1, 4094, F);
2711                         }
2712                 }
2713                 s[i] = '\0';
2714                 fclose(F);
2715                 parse_program(s+1);
2716                 free(s);
2717         }
2718         if (opt & 0x8) // -W
2719                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2720
2721         if (!from_file) {
2722                 if (argc == optind)
2723                         bb_show_usage();
2724                 programname = "cmd. line";
2725                 parse_program(argv[optind++]);
2726
2727         }
2728
2729         /* fill in ARGV array */
2730         setvar_i(V[ARGC], argc - optind + 1);
2731         setari_u(V[ARGV], 0, "awk");
2732         for(i=optind; i < argc; i++)
2733                 setari_u(V[ARGV], i+1-optind, argv[i]);
2734
2735         evaluate(beginseq.first, &tv);
2736         if (! mainseq.first && ! endseq.first)
2737                 awk_exit(EXIT_SUCCESS);
2738
2739         /* input file could already be opened in BEGIN block */
2740         if (! iF) iF = next_input_file();
2741
2742         /* passing through input files */
2743         while (iF) {
2744
2745                 nextfile = FALSE;
2746                 setvar_i(V[FNR], 0);
2747
2748                 while ((c = awk_getline(iF, V[F0])) > 0) {
2749
2750                         nextrec = FALSE;
2751                         incvar(V[NR]);
2752                         incvar(V[FNR]);
2753                         evaluate(mainseq.first, &tv);
2754
2755                         if (nextfile)
2756                                 break;
2757                 }
2758
2759                 if (c < 0)
2760                         runtime_error(strerror(errno));
2761
2762                 iF = next_input_file();
2763
2764         }
2765
2766         awk_exit(EXIT_SUCCESS);
2767
2768         return 0;
2769 }