awk: style fixes
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT       240
16 #define MINNVBLOCK      64
17
18 /* variable flags */
19 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
20 #define VF_ARRAY        0x0002  /* 1 = it's an array */
21
22 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
23 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
25 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
27 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35         unsigned short type;            /* flags */
36         double number;
37         char *string;
38         union {
39                 int aidx;                               /* func arg idx (for compilation stage) */
40                 struct xhash_s *array;  /* array ptr */
41                 struct var_s *parent;   /* for func args, ptr to actual parameter */
42                 char **walker;                  /* list of array elements (for..in) */
43         } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48         struct node_s *first;
49         struct node_s *last;
50         char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55         unsigned short nargs;
56         struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61         FILE *F;
62         char *buffer;
63         int adv;
64         int size;
65         int pos;
66         unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70         union {
71                 struct var_s v;                 /* variable/array hash */
72                 struct rstream_s rs;    /* redirect streams hash */
73                 struct func_s f;                /* functions hash */
74         } data;
75         struct hash_item_s *next;       /* next in chain */
76         char name[1];                           /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80         unsigned nel;                                   /* num of elements */
81         unsigned csize;                                 /* current hash size */
82         unsigned nprime;                                /* next hash size in PRIMES[] */
83         unsigned glen;                                  /* summary length of item names */
84         struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89         uint32_t info;
90         unsigned short lineno;
91         union {
92                 struct node_s *n;
93                 var *v;
94                 int i;
95                 char *s;
96                 regex_t *re;
97         } l;
98         union {
99                 struct node_s *n;
100                 regex_t *ire;
101                 func *f;
102                 int argno;
103         } r;
104         union {
105                 struct node_s *n;
106         } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111         int size;
112         var *pos;
113         struct nvblock_s *prev;
114         struct nvblock_s *next;
115         var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119         node n;
120         regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!!  See next_token() */
125 #define TC_SEQSTART      1                              /* ( */
126 #define TC_SEQTERM      (1 << 1)                /* ) */
127 #define TC_REGEXP       (1 << 2)                /* /.../ */
128 #define TC_OUTRDR       (1 << 3)                /* | > >> */
129 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
130 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
131 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
132 #define TC_IN           (1 << 7)
133 #define TC_COMMA        (1 << 8)
134 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
135 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
136 #define TC_ARRTERM      (1 << 11)               /* ] */
137 #define TC_GRPSTART     (1 << 12)               /* { */
138 #define TC_GRPTERM      (1 << 13)               /* } */
139 #define TC_SEMICOL      (1 << 14)
140 #define TC_NEWLINE      (1 << 15)
141 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
142 #define TC_WHILE        (1 << 17)
143 #define TC_ELSE         (1 << 18)
144 #define TC_BUILTIN      (1 << 19)
145 #define TC_GETLINE      (1 << 20)
146 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
147 #define TC_BEGIN        (1 << 22)
148 #define TC_END          (1 << 23)
149 #define TC_EOF          (1 << 24)
150 #define TC_VARIABLE     (1 << 25)
151 #define TC_ARRAY        (1 << 26)
152 #define TC_FUNCTION     (1 << 27)
153 #define TC_STRING       (1 << 28)
154 #define TC_NUMBER       (1 << 29)
155
156 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
165 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173         TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183         TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1         0x010000
187 #define OF_RES2         0x020000
188 #define OF_STR1         0x040000
189 #define OF_STR2         0x080000
190 #define OF_NUM1         0x100000
191 #define OF_CHECKED      0x200000
192
193 /* combined operator flags */
194 #define xx      0
195 #define xV      OF_RES2
196 #define xS      (OF_RES2 | OF_STR2)
197 #define Vx      OF_RES1
198 #define VV      (OF_RES1 | OF_RES2)
199 #define Nx      (OF_RES1 | OF_NUM1)
200 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx      (OF_RES1 | OF_STR1)
202 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK       0xFF00
206 #define OPNMASK         0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211  */
212 #define P(x)    (x << 24)
213 #define PRIMASK         0x7F000000
214 #define PRIMASK2        0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS  0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
223         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
224
225         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
226         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
227         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
228
229         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
230         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
231         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
232         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
233         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
234         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
235         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
236         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
237         OC_DONE=0x2800,
238
239         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
240         ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
246         F_ti,   F_le,   F_sy,   F_ff,   F_cl
247 };
248
249 /* builtins */
250 enum {
251         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
252         B_ge,   B_gs,   B_su,
253         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
254 };
255
256 /* tokens and their corresponding info values */
257
258 #define NTC             "\377"          /* switch to next token class (tc<<1) */
259 #define NTCC    '\377'
260
261 #define OC_B    OC_BUILTIN
262
263 static const char tokenlist[] =
264         "\1("       NTC
265         "\1)"       NTC
266         "\1/"       NTC                                 /* REGEXP */
267         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
268         "\2++"      "\2--"      NTC                     /* UOPPOST */
269         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
270         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
271         "\2*="      "\2/="      "\2%="      "\2^="
272         "\1+"       "\1-"       "\3**="     "\2**"
273         "\1/"       "\1%"       "\1^"       "\1*"
274         "\2!="      "\2>="      "\2<="      "\1>"
275         "\1<"       "\2!~"      "\1~"       "\2&&"
276         "\2||"      "\1?"       "\1:"       NTC
277         "\2in"      NTC
278         "\1,"       NTC
279         "\1|"       NTC
280         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
281         "\1]"       NTC
282         "\1{"       NTC
283         "\1}"       NTC
284         "\1;"       NTC
285         "\1\n"      NTC
286         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
287         "\10continue"           "\6delete"  "\5print"
288         "\6printf"  "\4next"    "\10nextfile"
289         "\6return"  "\4exit"    NTC
290         "\5while"   NTC
291         "\4else"    NTC
292
293         "\3and"     "\5compl"   "\6lshift"  "\2or"
294         "\6rshift"  "\3xor"
295         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
296         "\3cos"     "\3exp"     "\3int"     "\3log"
297         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
298         "\6gensub"  "\4gsub"    "\5index"   "\6length"
299         "\5match"   "\5split"   "\7sprintf" "\3sub"
300         "\6substr"  "\7systime" "\10strftime"
301         "\7tolower" "\7toupper" NTC
302         "\7getline" NTC
303         "\4func"    "\10function"   NTC
304         "\5BEGIN"   NTC
305         "\3END"     "\0"
306         ;
307
308 static const uint32_t tokeninfo[] = {
309         0,
310         0,
311         OC_REGEXP,
312         xS|'a',     xS|'w',     xS|'|',
313         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
314         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
315             OC_FIELD|xV|P(5),
316         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
317             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
318         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
319             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
320         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
321             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
322         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
323             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
324         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
325             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
326         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
327             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
328         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
329             OC_COLON|xx|P(67)|':',
330         OC_IN|SV|P(49),
331         OC_COMMA|SS|P(80),
332         OC_PGETLINE|SV|P(37),
333         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
334             OC_UNARY|xV|P(19)|'!',
335         0,
336         0,
337         0,
338         0,
339         0,
340         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
341         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
342         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
343         OC_RETURN|Vx,   OC_EXIT|Nx,
344         ST_WHILE,
345         0,
346
347         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
348         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
349         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
350         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
351         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
352         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
353         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
354         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
355         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
356         OC_GETLINE|SV|P(0),
357         0,      0,
358         0,
359         0
360 };
361
362 /* internal variable names and their initial values       */
363 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
364 enum {
365         CONVFMT=0,  OFMT,       FS,         OFS,
366         ORS,        RS,         RT,         FILENAME,
367         SUBSEP,     ARGIND,     ARGC,       ARGV,
368         ERRNO,      FNR,
369         NR,         NF,         IGNORECASE,
370         ENVIRON,    F0,         _intvarcount_
371 };
372
373 static const char vNames[] =
374         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
375         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
376         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
377         "ERRNO\0"   "FNR\0"
378         "NR\0"      "NF\0*"     "IGNORECASE\0*"
379         "ENVIRON\0" "$\0*"      "\0";
380
381 static const char vValues[] =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"
385         "\377";
386
387 /* hash size may grow to these values */
388 #define FIRST_PRIME 61;
389 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
390 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
391
392 /* globals */
393
394 extern char **environ;
395
396 static var * V[_intvarcount_];
397 static chain beginseq, mainseq, endseq, *seq;
398 static int nextrec, nextfile;
399 static node *break_ptr, *continue_ptr;
400 static rstream *iF;
401 static xhash *vhash, *ahash, *fdhash, *fnhash;
402 static char *programname;
403 static short lineno;
404 static int is_f0_split;
405 static int nfields;
406 static var *Fields;
407 static tsplitter fsplitter, rsplitter;
408 static nvblock *cb;
409 static char *pos;
410 static char *buf;
411 static int icase;
412 static int exiting;
413
414 static struct {
415         uint32_t tclass;
416         uint32_t info;
417         char *string;
418         double number;
419         short lineno;
420         int rollback;
421 } t;
422
423 /* function prototypes */
424 static void handle_special(var *);
425 static node *parse_expr(uint32_t);
426 static void chain_group(void);
427 static var *evaluate(node *, var *);
428 static rstream *next_input_file(void);
429 static int fmt_num(char *, int, const char *, double, int);
430 static int awk_exit(int) ATTRIBUTE_NORETURN;
431
432 /* ---- error handling ---- */
433
434 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
435 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
436 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
437 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
438 static const char EMSG_INV_FMT[] = "Invalid format specifier";
439 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
440 static const char EMSG_NOT_ARRAY[] = "Not an array";
441 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
442 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
443 #if !ENABLE_FEATURE_AWK_MATH
444 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
445 #endif
446
447 static void zero_out_var(var * vp)
448 {
449         memset(vp, 0, sizeof(*vp));
450 }
451
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
454 {
455         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
456 }
457
458 #define runtime_error(x) syntax_error(x)
459
460
461 /* ---- hash stuff ---- */
462
463 static unsigned hashidx(const char *name)
464 {
465         unsigned idx = 0;
466
467         while (*name) idx = *name++ + (idx << 6) - idx;
468         return idx;
469 }
470
471 /* create new hash */
472 static xhash *hash_init(void)
473 {
474         xhash *newhash;
475
476         newhash = xzalloc(sizeof(xhash));
477         newhash->csize = FIRST_PRIME;
478         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
479
480         return newhash;
481 }
482
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
485 {
486         hash_item *hi;
487
488         hi = hash->items [ hashidx(name) % hash->csize ];
489         while (hi) {
490                 if (strcmp(hi->name, name) == 0)
491                         return &(hi->data);
492                 hi = hi->next;
493         }
494         return NULL;
495 }
496
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
499 {
500         unsigned newsize, i, idx;
501         hash_item **newitems, *hi, *thi;
502
503         if (hash->nprime == NPRIMES)
504                 return;
505
506         newsize = PRIMES[hash->nprime++];
507         newitems = xzalloc(newsize * sizeof(hash_item *));
508
509         for (i=0; i<hash->csize; i++) {
510                 hi = hash->items[i];
511                 while (hi) {
512                         thi = hi;
513                         hi = thi->next;
514                         idx = hashidx(thi->name) % newsize;
515                         thi->next = newitems[idx];
516                         newitems[idx] = thi;
517                 }
518         }
519
520         free(hash->items);
521         hash->csize = newsize;
522         hash->items = newitems;
523 }
524
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
527 {
528         hash_item *hi;
529         unsigned idx;
530         int l;
531
532         hi = hash_search(hash, name);
533         if (! hi) {
534                 if (++hash->nel / hash->csize > 10)
535                         hash_rebuild(hash);
536
537                 l = strlen(name) + 1;
538                 hi = xzalloc(sizeof(hash_item) + l);
539                 memcpy(hi->name, name, l);
540
541                 idx = hashidx(name) % hash->csize;
542                 hi->next = hash->items[idx];
543                 hash->items[idx] = hi;
544                 hash->glen += l;
545         }
546         return &(hi->data);
547 }
548
549 #define findvar(hash, name) ((var*)    hash_find((hash) , (name)))
550 #define newvar(name)        ((var*)    hash_find(vhash , (name)))
551 #define newfile(name)       ((rstream*)hash_find(fdhash ,(name)))
552 #define newfunc(name)       ((func*)   hash_find(fnhash , (name)))
553
554 static void hash_remove(xhash *hash, const char *name)
555 {
556         hash_item *hi, **phi;
557
558         phi = &(hash->items[ hashidx(name) % hash->csize ]);
559         while (*phi) {
560                 hi = *phi;
561                 if (strcmp(hi->name, name) == 0) {
562                         hash->glen -= (strlen(name) + 1);
563                         hash->nel--;
564                         *phi = hi->next;
565                         free(hi);
566                         break;
567                 }
568                 phi = &(hi->next);
569         }
570 }
571
572 /* ------ some useful functions ------ */
573
574 static void skip_spaces(char **s)
575 {
576         char *p = *s;
577
578         while (*p == ' ' || *p == '\t' ||
579                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
580                 p++;
581         }
582         *s = p;
583 }
584
585 static char *nextword(char **s)
586 {
587         char *p = *s;
588
589         while (*(*s)++) /* */;
590
591         return p;
592 }
593
594 static char nextchar(char **s)
595 {
596         char c, *pps;
597
598         c = *((*s)++);
599         pps = *s;
600         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601         if (c == '\\' && *s == pps) c = *((*s)++);
602         return c;
603 }
604
605 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
606 {
607         return (isalnum(c) || c == '_');
608 }
609
610 static FILE *afopen(const char *path, const char *mode)
611 {
612         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
613 }
614
615 /* -------- working with variables (set/get/copy/etc) -------- */
616
617 static xhash *iamarray(var *v)
618 {
619         var *a = v;
620
621         while (a->type & VF_CHILD)
622                 a = a->x.parent;
623
624         if (! (a->type & VF_ARRAY)) {
625                 a->type |= VF_ARRAY;
626                 a->x.array = hash_init();
627         }
628         return a->x.array;
629 }
630
631 static void clear_array(xhash *array)
632 {
633         unsigned i;
634         hash_item *hi, *thi;
635
636         for (i=0; i<array->csize; i++) {
637                 hi = array->items[i];
638                 while (hi) {
639                         thi = hi;
640                         hi = hi->next;
641                         free(thi->data.v.string);
642                         free(thi);
643                 }
644                 array->items[i] = NULL;
645         }
646         array->glen = array->nel = 0;
647 }
648
649 /* clear a variable */
650 static var *clrvar(var *v)
651 {
652         if (!(v->type & VF_FSTR))
653                 free(v->string);
654
655         v->type &= VF_DONTTOUCH;
656         v->type |= VF_DIRTY;
657         v->string = NULL;
658         return v;
659 }
660
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
663 {
664         clrvar(v);
665         v->string = value;
666         handle_special(v);
667
668         return v;
669 }
670
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
673 {
674         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
675 }
676
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
679 {
680         setvar_s(v, value);
681         v->type |= VF_USER;
682         return v;
683 }
684
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
687 {
688         var *v;
689         static char sidx[12];
690
691         sprintf(sidx, "%d", idx);
692         v = findvar(iamarray(a), sidx);
693         setvar_u(v, s);
694 }
695
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
698 {
699         clrvar(v);
700         v->type |= VF_NUMBER;
701         v->number = value;
702         handle_special(v);
703         return v;
704 }
705
706 static char *getvar_s(var *v)
707 {
708         /* if v is numeric and has no cached string, convert it to string */
709         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711                 v->string = xstrdup(buf);
712                 v->type |= VF_CACHED;
713         }
714         return (v->string == NULL) ? "" : v->string;
715 }
716
717 static double getvar_i(var *v)
718 {
719         char *s;
720
721         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
722                 v->number = 0;
723                 s = v->string;
724                 if (s && *s) {
725                         v->number = strtod(s, &s);
726                         if (v->type & VF_USER) {
727                                 skip_spaces(&s);
728                                 if (*s != '\0')
729                                         v->type &= ~VF_USER;
730                         }
731                 } else {
732                         v->type &= ~VF_USER;
733                 }
734                 v->type |= VF_CACHED;
735         }
736         return v->number;
737 }
738
739 static var *copyvar(var *dest, const var *src)
740 {
741         if (dest != src) {
742                 clrvar(dest);
743                 dest->type |= (src->type & ~VF_DONTTOUCH);
744                 dest->number = src->number;
745                 if (src->string)
746                         dest->string = xstrdup(src->string);
747         }
748         handle_special(dest);
749         return dest;
750 }
751
752 static var *incvar(var *v)
753 {
754         return setvar_i(v, getvar_i(v)+1.);
755 }
756
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
759 {
760         getvar_i(v);
761         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
762 }
763
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
766 {
767         if (is_numeric(v))
768                 return (v->number == 0) ? 0 : 1;
769         else
770                 return (v->string && *(v->string)) ? 1 : 0;
771 }
772
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
775 {
776         nvblock *pb = NULL;
777         var *v, *r;
778         int size;
779
780         while (cb) {
781                 pb = cb;
782                 if ((cb->pos - cb->nv) + n <= cb->size) break;
783                 cb = cb->next;
784         }
785
786         if (! cb) {
787                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788                 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
789                 cb->size = size;
790                 cb->pos = cb->nv;
791                 cb->prev = pb;
792                 cb->next = NULL;
793                 if (pb) pb->next = cb;
794         }
795
796         v = r = cb->pos;
797         cb->pos += n;
798
799         while (v < cb->pos) {
800                 v->type = 0;
801                 v->string = NULL;
802                 v++;
803         }
804
805         return r;
806 }
807
808 static void nvfree(var *v)
809 {
810         var *p;
811
812         if (v < cb->nv || v >= cb->pos)
813                 runtime_error(EMSG_INTERNAL_ERROR);
814
815         for (p=v; p<cb->pos; p++) {
816                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817                         clear_array(iamarray(p));
818                         free(p->x.array->items);
819                         free(p->x.array);
820                 }
821                 if (p->type & VF_WALK)
822                         free(p->x.walker);
823
824                 clrvar(p);
825         }
826
827         cb->pos = v;
828         while (cb->prev && cb->pos == cb->nv) {
829                 cb = cb->prev;
830         }
831 }
832
833 /* ------- awk program text parsing ------- */
834
835 /* Parse next token pointed by global pos, place results into global t.
836  * If token isn't expected, give away. Return token class
837  */
838 static uint32_t next_token(uint32_t expected)
839 {
840         static int concat_inserted;
841         static uint32_t save_tclass, save_info;
842         static uint32_t ltclass = TC_OPTERM;
843
844         char *p, *pp, *s;
845         const char *tl;
846         uint32_t tc;
847         const uint32_t *ti;
848         int l;
849
850         if (t.rollback) {
851                 t.rollback = FALSE;
852
853         } else if (concat_inserted) {
854                 concat_inserted = FALSE;
855                 t.tclass = save_tclass;
856                 t.info = save_info;
857
858         } else {
859                 p = pos;
860  readnext:
861                 skip_spaces(&p);
862                 lineno = t.lineno;
863                 if (*p == '#')
864                         while (*p != '\n' && *p != '\0') p++;
865
866                 if (*p == '\n')
867                         t.lineno++;
868
869                 if (*p == '\0') {
870                         tc = TC_EOF;
871
872                 } else if (*p == '\"') {
873                         /* it's a string */
874                         t.string = s = ++p;
875                         while (*p != '\"') {
876                                 if (*p == '\0' || *p == '\n')
877                                         syntax_error(EMSG_UNEXP_EOS);
878                                 *(s++) = nextchar(&p);
879                         }
880                         p++;
881                         *s = '\0';
882                         tc = TC_STRING;
883
884                 } else if ((expected & TC_REGEXP) && *p == '/') {
885                         /* it's regexp */
886                         t.string = s = ++p;
887                         while (*p != '/') {
888                                 if (*p == '\0' || *p == '\n')
889                                         syntax_error(EMSG_UNEXP_EOS);
890                                 if ((*s++ = *p++) == '\\') {
891                                         pp = p;
892                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
893                                         if (*pp == '\\') *s++ = '\\';
894                                         if (p == pp) *s++ = *p++;
895                                 }
896                         }
897                         p++;
898                         *s = '\0';
899                         tc = TC_REGEXP;
900
901                 } else if (*p == '.' || isdigit(*p)) {
902                         /* it's a number */
903                         t.number = strtod(p, &p);
904                         if (*p == '.')
905                                 syntax_error(EMSG_UNEXP_TOKEN);
906                         tc = TC_NUMBER;
907
908                 } else {
909                         /* search for something known */
910                         tl = tokenlist;
911                         tc = 0x00000001;
912                         ti = tokeninfo;
913                         while (*tl) {
914                                 l = *(tl++);
915                                 if (l == NTCC) {
916                                         tc <<= 1;
917                                         continue;
918                                 }
919                                 /* if token class is expected, token
920                                  * matches and it's not a longer word,
921                                  * then this is what we are looking for
922                                  */
923                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
924                                 *tl == *p && strncmp(p, tl, l) == 0 &&
925                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
926                                         t.info = *ti;
927                                         p += l;
928                                         break;
929                                 }
930                                 ti++;
931                                 tl += l;
932                         }
933
934                         if (!*tl) {
935                                 /* it's a name (var/array/function),
936                                  * otherwise it's something wrong
937                                  */
938                                 if (!isalnum_(*p))
939                                         syntax_error(EMSG_UNEXP_TOKEN);
940
941                                 t.string = --p;
942                                 while (isalnum_(*(++p))) {
943                                         *(p-1) = *p;
944                                 }
945                                 *(p-1) = '\0';
946                                 tc = TC_VARIABLE;
947                                 /* also consume whitespace between functionname and bracket */
948                                 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
949                                 if (*p == '(') {
950                                         tc = TC_FUNCTION;
951                                 } else {
952                                         if (*p == '[') {
953                                                 p++;
954                                                 tc = TC_ARRAY;
955                                         }
956                                 }
957                         }
958                 }
959                 pos = p;
960
961                 /* skipping newlines in some cases */
962                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
963                         goto readnext;
964
965                 /* insert concatenation operator when needed */
966                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
967                         concat_inserted = TRUE;
968                         save_tclass = tc;
969                         save_info = t.info;
970                         tc = TC_BINOP;
971                         t.info = OC_CONCAT | SS | P(35);
972                 }
973
974                 t.tclass = tc;
975         }
976         ltclass = t.tclass;
977
978         /* Are we ready for this? */
979         if (! (ltclass & expected))
980                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
981                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
982
983         return ltclass;
984 }
985
986 static void rollback_token(void) { t.rollback = TRUE; }
987
988 static node *new_node(uint32_t info)
989 {
990         node *n;
991
992         n = xzalloc(sizeof(node));
993         n->info = info;
994         n->lineno = lineno;
995         return n;
996 }
997
998 static node *mk_re_node(char *s, node *n, regex_t *re)
999 {
1000         n->info = OC_REGEXP;
1001         n->l.re = re;
1002         n->r.ire = re + 1;
1003         xregcomp(re, s, REG_EXTENDED);
1004         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1005
1006         return n;
1007 }
1008
1009 static node *condition(void)
1010 {
1011         next_token(TC_SEQSTART);
1012         return parse_expr(TC_SEQTERM);
1013 }
1014
1015 /* parse expression terminated by given argument, return ptr
1016  * to built subtree. Terminator is eaten by parse_expr */
1017 static node *parse_expr(uint32_t iexp)
1018 {
1019         node sn;
1020         node *cn = &sn;
1021         node *vn, *glptr;
1022         uint32_t tc, xtc;
1023         var *v;
1024
1025         sn.info = PRIMASK;
1026         sn.r.n = glptr = NULL;
1027         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1028
1029         while (! ((tc = next_token(xtc)) & iexp)) {
1030                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1031                         /* input redirection (<) attached to glptr node */
1032                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1033                         cn->a.n = glptr;
1034                         xtc = TC_OPERAND | TC_UOPPRE;
1035                         glptr = NULL;
1036
1037                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1038                         /* for binary and postfix-unary operators, jump back over
1039                          * previous operators with higher priority */
1040                         vn = cn;
1041                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1042                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1043                                 vn = vn->a.n;
1044                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1045                                 t.info += P(6);
1046                         cn = vn->a.n->r.n = new_node(t.info);
1047                         cn->a.n = vn->a.n;
1048                         if (tc & TC_BINOP) {
1049                                 cn->l.n = vn;
1050                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1051                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1052                                         /* it's a pipe */
1053                                         next_token(TC_GETLINE);
1054                                         /* give maximum priority to this pipe */
1055                                         cn->info &= ~PRIMASK;
1056                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1057                                 }
1058                         } else {
1059                                 cn->r.n = vn;
1060                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1061                         }
1062                         vn->a.n = cn;
1063
1064                 } else {
1065                         /* for operands and prefix-unary operators, attach them
1066                          * to last node */
1067                         vn = cn;
1068                         cn = vn->r.n = new_node(t.info);
1069                         cn->a.n = vn;
1070                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1071                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1072                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1073                                 /* one should be very careful with switch on tclass -
1074                                  * only simple tclasses should be used! */
1075                                 switch (tc) {
1076                                 case TC_VARIABLE:
1077                                 case TC_ARRAY:
1078                                         cn->info = OC_VAR;
1079                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1080                                                 cn->info = OC_FNARG;
1081                                                 cn->l.i = v->x.aidx;
1082                                         } else {
1083                                                 cn->l.v = newvar(t.string);
1084                                         }
1085                                         if (tc & TC_ARRAY) {
1086                                                 cn->info |= xS;
1087                                                 cn->r.n = parse_expr(TC_ARRTERM);
1088                                         }
1089                                         break;
1090
1091                                 case TC_NUMBER:
1092                                 case TC_STRING:
1093                                         cn->info = OC_VAR;
1094                                         v = cn->l.v = xzalloc(sizeof(var));
1095                                         if (tc & TC_NUMBER)
1096                                                 setvar_i(v, t.number);
1097                                         else
1098                                                 setvar_s(v, t.string);
1099                                         break;
1100
1101                                 case TC_REGEXP:
1102                                         mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1103                                         break;
1104
1105                                 case TC_FUNCTION:
1106                                         cn->info = OC_FUNC;
1107                                         cn->r.f = newfunc(t.string);
1108                                         cn->l.n = condition();
1109                                         break;
1110
1111                                 case TC_SEQSTART:
1112                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1113                                         cn->a.n = vn;
1114                                         break;
1115
1116                                 case TC_GETLINE:
1117                                         glptr = cn;
1118                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1119                                         break;
1120
1121                                 case TC_BUILTIN:
1122                                         cn->l.n = condition();
1123                                         break;
1124                                 }
1125                         }
1126                 }
1127         }
1128         return sn.r.n;
1129 }
1130
1131 /* add node to chain. Return ptr to alloc'd node */
1132 static node *chain_node(uint32_t info)
1133 {
1134         node *n;
1135
1136         if (! seq->first)
1137                 seq->first = seq->last = new_node(0);
1138
1139         if (seq->programname != programname) {
1140                 seq->programname = programname;
1141                 n = chain_node(OC_NEWSOURCE);
1142                 n->l.s = xstrdup(programname);
1143         }
1144
1145         n = seq->last;
1146         n->info = info;
1147         seq->last = n->a.n = new_node(OC_DONE);
1148
1149         return n;
1150 }
1151
1152 static void chain_expr(uint32_t info)
1153 {
1154         node *n;
1155
1156         n = chain_node(info);
1157         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1158         if (t.tclass & TC_GRPTERM)
1159                 rollback_token();
1160 }
1161
1162 static node *chain_loop(node *nn)
1163 {
1164         node *n, *n2, *save_brk, *save_cont;
1165
1166         save_brk = break_ptr;
1167         save_cont = continue_ptr;
1168
1169         n = chain_node(OC_BR | Vx);
1170         continue_ptr = new_node(OC_EXEC);
1171         break_ptr = new_node(OC_EXEC);
1172         chain_group();
1173         n2 = chain_node(OC_EXEC | Vx);
1174         n2->l.n = nn;
1175         n2->a.n = n;
1176         continue_ptr->a.n = n2;
1177         break_ptr->a.n = n->r.n = seq->last;
1178
1179         continue_ptr = save_cont;
1180         break_ptr = save_brk;
1181
1182         return n;
1183 }
1184
1185 /* parse group and attach it to chain */
1186 static void chain_group(void)
1187 {
1188         uint32_t c;
1189         node *n, *n2, *n3;
1190
1191         do {
1192                 c = next_token(TC_GRPSEQ);
1193         } while (c & TC_NEWLINE);
1194
1195         if (c & TC_GRPSTART) {
1196                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1197                         if (t.tclass & TC_NEWLINE) continue;
1198                         rollback_token();
1199                         chain_group();
1200                 }
1201         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1202                 rollback_token();
1203                 chain_expr(OC_EXEC | Vx);
1204         } else {                                                /* TC_STATEMNT */
1205                 switch (t.info & OPCLSMASK) {
1206                         case ST_IF:
1207                                 n = chain_node(OC_BR | Vx);
1208                                 n->l.n = condition();
1209                                 chain_group();
1210                                 n2 = chain_node(OC_EXEC);
1211                                 n->r.n = seq->last;
1212                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1213                                         chain_group();
1214                                         n2->a.n = seq->last;
1215                                 } else {
1216                                         rollback_token();
1217                                 }
1218                                 break;
1219
1220                         case ST_WHILE:
1221                                 n2 = condition();
1222                                 n = chain_loop(NULL);
1223                                 n->l.n = n2;
1224                                 break;
1225
1226                         case ST_DO:
1227                                 n2 = chain_node(OC_EXEC);
1228                                 n = chain_loop(NULL);
1229                                 n2->a.n = n->a.n;
1230                                 next_token(TC_WHILE);
1231                                 n->l.n = condition();
1232                                 break;
1233
1234                         case ST_FOR:
1235                                 next_token(TC_SEQSTART);
1236                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1237                                 if (t.tclass & TC_SEQTERM) {    /* for-in */
1238                                         if ((n2->info & OPCLSMASK) != OC_IN)
1239                                                 syntax_error(EMSG_UNEXP_TOKEN);
1240                                         n = chain_node(OC_WALKINIT | VV);
1241                                         n->l.n = n2->l.n;
1242                                         n->r.n = n2->r.n;
1243                                         n = chain_loop(NULL);
1244                                         n->info = OC_WALKNEXT | Vx;
1245                                         n->l.n = n2->l.n;
1246                                 } else {                        /* for (;;) */
1247                                         n = chain_node(OC_EXEC | Vx);
1248                                         n->l.n = n2;
1249                                         n2 = parse_expr(TC_SEMICOL);
1250                                         n3 = parse_expr(TC_SEQTERM);
1251                                         n = chain_loop(n3);
1252                                         n->l.n = n2;
1253                                         if (! n2)
1254                                                 n->info = OC_EXEC;
1255                                 }
1256                                 break;
1257
1258                         case OC_PRINT:
1259                         case OC_PRINTF:
1260                                 n = chain_node(t.info);
1261                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1262                                 if (t.tclass & TC_OUTRDR) {
1263                                         n->info |= t.info;
1264                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1265                                 }
1266                                 if (t.tclass & TC_GRPTERM)
1267                                         rollback_token();
1268                                 break;
1269
1270                         case OC_BREAK:
1271                                 n = chain_node(OC_EXEC);
1272                                 n->a.n = break_ptr;
1273                                 break;
1274
1275                         case OC_CONTINUE:
1276                                 n = chain_node(OC_EXEC);
1277                                 n->a.n = continue_ptr;
1278                                 break;
1279
1280                         /* delete, next, nextfile, return, exit */
1281                         default:
1282                                 chain_expr(t.info);
1283                 }
1284         }
1285 }
1286
1287 static void parse_program(char *p)
1288 {
1289         uint32_t tclass;
1290         node *cn;
1291         func *f;
1292         var *v;
1293
1294         pos = p;
1295         t.lineno = 1;
1296         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1297                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1298
1299                 if (tclass & TC_OPTERM)
1300                         continue;
1301
1302                 seq = &mainseq;
1303                 if (tclass & TC_BEGIN) {
1304                         seq = &beginseq;
1305                         chain_group();
1306
1307                 } else if (tclass & TC_END) {
1308                         seq = &endseq;
1309                         chain_group();
1310
1311                 } else if (tclass & TC_FUNCDECL) {
1312                         next_token(TC_FUNCTION);
1313                         pos++;
1314                         f = newfunc(t.string);
1315                         f->body.first = NULL;
1316                         f->nargs = 0;
1317                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1318                                 v = findvar(ahash, t.string);
1319                                 v->x.aidx = (f->nargs)++;
1320
1321                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1322                                         break;
1323                         }
1324                         seq = &(f->body);
1325                         chain_group();
1326                         clear_array(ahash);
1327
1328                 } else if (tclass & TC_OPSEQ) {
1329                         rollback_token();
1330                         cn = chain_node(OC_TEST);
1331                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1332                         if (t.tclass & TC_GRPSTART) {
1333                                 rollback_token();
1334                                 chain_group();
1335                         } else {
1336                                 chain_node(OC_PRINT);
1337                         }
1338                         cn->r.n = mainseq.last;
1339
1340                 } else /* if (tclass & TC_GRPSTART) */ {
1341                         rollback_token();
1342                         chain_group();
1343                 }
1344         }
1345 }
1346
1347
1348 /* -------- program execution part -------- */
1349
1350 static node *mk_splitter(char *s, tsplitter *spl)
1351 {
1352         regex_t *re, *ire;
1353         node *n;
1354
1355         re = &spl->re[0];
1356         ire = &spl->re[1];
1357         n = &spl->n;
1358         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1359                 regfree(re);
1360                 regfree(ire);
1361         }
1362         if (strlen(s) > 1) {
1363                 mk_re_node(s, n, re);
1364         } else {
1365                 n->info = (uint32_t) *s;
1366         }
1367
1368         return n;
1369 }
1370
1371 /* use node as a regular expression. Supplied with node ptr and regex_t
1372  * storage space. Return ptr to regex (if result points to preg, it should
1373  * be later regfree'd manually
1374  */
1375 static regex_t *as_regex(node *op, regex_t *preg)
1376 {
1377         var *v;
1378         char *s;
1379
1380         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1381                 return icase ? op->r.ire : op->l.re;
1382         } else {
1383                 v = nvalloc(1);
1384                 s = getvar_s(evaluate(op, v));
1385                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1386                 nvfree(v);
1387                 return preg;
1388         }
1389 }
1390
1391 /* gradually increasing buffer */
1392 static void qrealloc(char **b, int n, int *size)
1393 {
1394         if (!*b || n >= *size)
1395                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1396 }
1397
1398 /* resize field storage space */
1399 static void fsrealloc(int size)
1400 {
1401         static int maxfields; /* = 0;*/
1402         int i;
1403
1404         if (size >= maxfields) {
1405                 i = maxfields;
1406                 maxfields = size + 16;
1407                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1408                 for (; i < maxfields; i++) {
1409                         Fields[i].type = VF_SPECIAL;
1410                         Fields[i].string = NULL;
1411                 }
1412         }
1413
1414         if (size < nfields) {
1415                 for (i = size; i < nfields; i++) {
1416                         clrvar(Fields + i);
1417                 }
1418         }
1419         nfields = size;
1420 }
1421
1422 static int awk_split(char *s, node *spl, char **slist)
1423 {
1424         int l, n = 0;
1425         char c[4];
1426         char *s1;
1427         regmatch_t pmatch[2];
1428
1429         /* in worst case, each char would be a separate field */
1430         *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1431
1432         c[0] = c[1] = (char)spl->info;
1433         c[2] = c[3] = '\0';
1434         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1435
1436         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1437                 while (*s) {
1438                         l = strcspn(s, c+2);
1439                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1440                         pmatch[0].rm_so <= l) {
1441                                 l = pmatch[0].rm_so;
1442                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1443                         } else {
1444                                 pmatch[0].rm_eo = l;
1445                                 if (s[l]) pmatch[0].rm_eo++;
1446                         }
1447
1448                         memcpy(s1, s, l);
1449                         s1[l] = '\0';
1450                         nextword(&s1);
1451                         s += pmatch[0].rm_eo;
1452                         n++;
1453                 }
1454         } else if (c[0] == '\0') {              /* null split */
1455                 while (*s) {
1456                         *s1++ = *s++;
1457                         *s1++ = '\0';
1458                         n++;
1459                 }
1460         } else if (c[0] != ' ') {               /* single-character split */
1461                 if (icase) {
1462                         c[0] = toupper(c[0]);
1463                         c[1] = tolower(c[1]);
1464                 }
1465                 if (*s1) n++;
1466                 while ((s1 = strpbrk(s1, c))) {
1467                         *s1++ = '\0';
1468                         n++;
1469                 }
1470         } else {                                /* space split */
1471                 while (*s) {
1472                         s = skip_whitespace(s);
1473                         if (!*s) break;
1474                         n++;
1475                         while (*s && !isspace(*s))
1476                                 *s1++ = *s++;
1477                         *s1++ = '\0';
1478                 }
1479         }
1480         return n;
1481 }
1482
1483 static void split_f0(void)
1484 {
1485         static char *fstrings = NULL;
1486         int i, n;
1487         char *s;
1488
1489         if (is_f0_split)
1490                 return;
1491
1492         is_f0_split = TRUE;
1493         free(fstrings);
1494         fsrealloc(0);
1495         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1496         fsrealloc(n);
1497         s = fstrings;
1498         for (i = 0; i < n; i++) {
1499                 Fields[i].string = nextword(&s);
1500                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1501         }
1502
1503         /* set NF manually to avoid side effects */
1504         clrvar(V[NF]);
1505         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1506         V[NF]->number = nfields;
1507 }
1508
1509 /* perform additional actions when some internal variables changed */
1510 static void handle_special(var *v)
1511 {
1512         int n;
1513         char *b, *sep, *s;
1514         int sl, l, len, i, bsize;
1515
1516         if (!(v->type & VF_SPECIAL))
1517                 return;
1518
1519         if (v == V[NF]) {
1520                 n = (int)getvar_i(v);
1521                 fsrealloc(n);
1522
1523                 /* recalculate $0 */
1524                 sep = getvar_s(V[OFS]);
1525                 sl = strlen(sep);
1526                 b = NULL;
1527                 len = 0;
1528                 for (i=0; i<n; i++) {
1529                         s = getvar_s(&Fields[i]);
1530                         l = strlen(s);
1531                         if (b) {
1532                                 memcpy(b+len, sep, sl);
1533                                 len += sl;
1534                         }
1535                         qrealloc(&b, len+l+sl, &bsize);
1536                         memcpy(b+len, s, l);
1537                         len += l;
1538                 }
1539                 if (b)
1540                         b[len] = '\0';
1541                 setvar_p(V[F0], b);
1542                 is_f0_split = TRUE;
1543
1544         } else if (v == V[F0]) {
1545                 is_f0_split = FALSE;
1546
1547         } else if (v == V[FS]) {
1548                 mk_splitter(getvar_s(v), &fsplitter);
1549
1550         } else if (v == V[RS]) {
1551                 mk_splitter(getvar_s(v), &rsplitter);
1552
1553         } else if (v == V[IGNORECASE]) {
1554                 icase = istrue(v);
1555
1556         } else {                                /* $n */
1557                 n = getvar_i(V[NF]);
1558                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1559                 /* right here v is invalid. Just to note... */
1560         }
1561 }
1562
1563 /* step through func/builtin/etc arguments */
1564 static node *nextarg(node **pn)
1565 {
1566         node *n;
1567
1568         n = *pn;
1569         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1570                 *pn = n->r.n;
1571                 n = n->l.n;
1572         } else {
1573                 *pn = NULL;
1574         }
1575         return n;
1576 }
1577
1578 static void hashwalk_init(var *v, xhash *array)
1579 {
1580         char **w;
1581         hash_item *hi;
1582         int i;
1583
1584         if (v->type & VF_WALK)
1585                 free(v->x.walker);
1586
1587         v->type |= VF_WALK;
1588         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1589         *w = *(w+1) = (char *)(w + 2);
1590         for (i=0; i<array->csize; i++) {
1591                 hi = array->items[i];
1592                 while (hi) {
1593                         strcpy(*w, hi->name);
1594                         nextword(w);
1595                         hi = hi->next;
1596                 }
1597         }
1598 }
1599
1600 static int hashwalk_next(var *v)
1601 {
1602         char **w;
1603
1604         w = v->x.walker;
1605         if (*(w+1) == *w)
1606                 return FALSE;
1607
1608         setvar_s(v, nextword(w+1));
1609         return TRUE;
1610 }
1611
1612 /* evaluate node, return 1 when result is true, 0 otherwise */
1613 static int ptest(node *pattern)
1614 {
1615         static var v; /* static: to save stack space? */
1616
1617         return istrue(evaluate(pattern, &v));
1618 }
1619
1620 /* read next record from stream rsm into a variable v */
1621 static int awk_getline(rstream *rsm, var *v)
1622 {
1623         char *b;
1624         regmatch_t pmatch[2];
1625         int a, p, pp=0, size;
1626         int fd, so, eo, r, rp;
1627         char c, *m, *s;
1628
1629         /* we're using our own buffer since we need access to accumulating
1630          * characters
1631          */
1632         fd = fileno(rsm->F);
1633         m = rsm->buffer;
1634         a = rsm->adv;
1635         p = rsm->pos;
1636         size = rsm->size;
1637         c = (char) rsplitter.n.info;
1638         rp = 0;
1639
1640         if (! m) qrealloc(&m, 256, &size);
1641         do {
1642                 b = m + a;
1643                 so = eo = p;
1644                 r = 1;
1645                 if (p > 0) {
1646                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1647                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1648                                                                                                 b, 1, pmatch, 0) == 0) {
1649                                         so = pmatch[0].rm_so;
1650                                         eo = pmatch[0].rm_eo;
1651                                         if (b[eo] != '\0')
1652                                                 break;
1653                                 }
1654                         } else if (c != '\0') {
1655                                 s = strchr(b+pp, c);
1656                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1657                                 if (s) {
1658                                         so = eo = s-b;
1659                                         eo++;
1660                                         break;
1661                                 }
1662                         } else {
1663                                 while (b[rp] == '\n')
1664                                         rp++;
1665                                 s = strstr(b+rp, "\n\n");
1666                                 if (s) {
1667                                         so = eo = s-b;
1668                                         while (b[eo] == '\n') eo++;
1669                                         if (b[eo] != '\0')
1670                                                 break;
1671                                 }
1672                         }
1673                 }
1674
1675                 if (a > 0) {
1676                         memmove(m, (const void *)(m+a), p+1);
1677                         b = m;
1678                         a = 0;
1679                 }
1680
1681                 qrealloc(&m, a+p+128, &size);
1682                 b = m + a;
1683                 pp = p;
1684                 p += safe_read(fd, b+p, size-p-1);
1685                 if (p < pp) {
1686                         p = 0;
1687                         r = 0;
1688                         setvar_i(V[ERRNO], errno);
1689                 }
1690                 b[p] = '\0';
1691
1692         } while (p > pp);
1693
1694         if (p == 0) {
1695                 r--;
1696         } else {
1697                 c = b[so]; b[so] = '\0';
1698                 setvar_s(v, b+rp);
1699                 v->type |= VF_USER;
1700                 b[so] = c;
1701                 c = b[eo]; b[eo] = '\0';
1702                 setvar_s(V[RT], b+so);
1703                 b[eo] = c;
1704         }
1705
1706         rsm->buffer = m;
1707         rsm->adv = a + eo;
1708         rsm->pos = p - eo;
1709         rsm->size = size;
1710
1711         return r;
1712 }
1713
1714 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1715 {
1716         int r = 0;
1717         char c;
1718         const char *s = format;
1719
1720         if (int_as_int && n == (int)n) {
1721                 r = snprintf(b, size, "%d", (int)n);
1722         } else {
1723                 do { c = *s; } while (c && *++s);
1724                 if (strchr("diouxX", c)) {
1725                         r = snprintf(b, size, format, (int)n);
1726                 } else if (strchr("eEfgG", c)) {
1727                         r = snprintf(b, size, format, n);
1728                 } else {
1729                         runtime_error(EMSG_INV_FMT);
1730                 }
1731         }
1732         return r;
1733 }
1734
1735
1736 /* formatted output into an allocated buffer, return ptr to buffer */
1737 static char *awk_printf(node *n)
1738 {
1739         char *b = NULL;
1740         char *fmt, *s, *s1, *f;
1741         int i, j, incr, bsize;
1742         char c, c1;
1743         var *v, *arg;
1744
1745         v = nvalloc(1);
1746         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1747
1748         i = 0;
1749         while (*f) {
1750                 s = f;
1751                 while (*f && (*f != '%' || *(++f) == '%'))
1752                         f++;
1753                 while (*f && !isalpha(*f))
1754                         f++;
1755
1756                 incr = (f - s) + MAXVARFMT;
1757                 qrealloc(&b, incr + i, &bsize);
1758                 c = *f;
1759                 if (c != '\0') f++;
1760                 c1 = *f;
1761                 *f = '\0';
1762                 arg = evaluate(nextarg(&n), v);
1763
1764                 j = i;
1765                 if (c == 'c' || !c) {
1766                         i += sprintf(b+i, s, is_numeric(arg) ?
1767                                         (char)getvar_i(arg) : *getvar_s(arg));
1768
1769                 } else if (c == 's') {
1770                         s1 = getvar_s(arg);
1771                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1772                         i += sprintf(b+i, s, s1);
1773
1774                 } else {
1775                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1776                 }
1777                 *f = c1;
1778
1779                 /* if there was an error while sprintf, return value is negative */
1780                 if (i < j) i = j;
1781         }
1782
1783         b = xrealloc(b, i + 1);
1784         free(fmt);
1785         nvfree(v);
1786         b[i] = '\0';
1787         return b;
1788 }
1789
1790 /* common substitution routine
1791  * replace (nm) substring of (src) that match (n) with (repl), store
1792  * result into (dest), return number of substitutions. If nm=0, replace
1793  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1794  * subexpression matching (\1-\9)
1795  */
1796 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1797 {
1798         char *ds = NULL;
1799         char *sp, *s;
1800         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1801         regmatch_t pmatch[10];
1802         regex_t sreg, *re;
1803
1804         re = as_regex(rn, &sreg);
1805         if (! src) src = V[F0];
1806         if (! dest) dest = V[F0];
1807
1808         i = di = 0;
1809         sp = getvar_s(src);
1810         rl = strlen(repl);
1811         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1812                 so = pmatch[0].rm_so;
1813                 eo = pmatch[0].rm_eo;
1814
1815                 qrealloc(&ds, di + eo + rl, &dssize);
1816                 memcpy(ds + di, sp, eo);
1817                 di += eo;
1818                 if (++i >= nm) {
1819                         /* replace */
1820                         di -= (eo - so);
1821                         nbs = 0;
1822                         for (s = repl; *s; s++) {
1823                                 ds[di++] = c = *s;
1824                                 if (c == '\\') {
1825                                         nbs++;
1826                                         continue;
1827                                 }
1828                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1829                                         di -= ((nbs + 3) >> 1);
1830                                         j = 0;
1831                                         if (c != '&') {
1832                                                 j = c - '0';
1833                                                 nbs++;
1834                                         }
1835                                         if (nbs % 2) {
1836                                                 ds[di++] = c;
1837                                         } else {
1838                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1839                                                 qrealloc(&ds, di + rl + n, &dssize);
1840                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1841                                                 di += n;
1842                                         }
1843                                 }
1844                                 nbs = 0;
1845                         }
1846                 }
1847
1848                 sp += eo;
1849                 if (i == nm) break;
1850                 if (eo == so) {
1851                         if (! (ds[di++] = *sp++)) break;
1852                 }
1853         }
1854
1855         qrealloc(&ds, di + strlen(sp), &dssize);
1856         strcpy(ds + di, sp);
1857         setvar_p(dest, ds);
1858         if (re == &sreg) regfree(re);
1859         return i;
1860 }
1861
1862 static var *exec_builtin(node *op, var *res)
1863 {
1864         int (*to_xxx)(int);
1865         var *tv;
1866         node *an[4];
1867         var  *av[4];
1868         char *as[4];
1869         regmatch_t pmatch[2];
1870         regex_t sreg, *re;
1871         static tsplitter tspl;
1872         node *spl;
1873         uint32_t isr, info;
1874         int nargs;
1875         time_t tt;
1876         char *s, *s1;
1877         int i, l, ll, n;
1878
1879         tv = nvalloc(4);
1880         isr = info = op->info;
1881         op = op->l.n;
1882
1883         av[2] = av[3] = NULL;
1884         for (i=0 ; i<4 && op ; i++) {
1885                 an[i] = nextarg(&op);
1886                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1887                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1888                 isr >>= 1;
1889         }
1890
1891         nargs = i;
1892         if (nargs < (info >> 30))
1893                 runtime_error(EMSG_TOO_FEW_ARGS);
1894
1895         switch (info & OPNMASK) {
1896
1897         case B_a2:
1898 #if ENABLE_FEATURE_AWK_MATH
1899                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1900 #else
1901                 runtime_error(EMSG_NO_MATH);
1902 #endif
1903                 break;
1904
1905         case B_sp:
1906                 if (nargs > 2) {
1907                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1908                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1909                 } else {
1910                         spl = &fsplitter.n;
1911                 }
1912
1913                 n = awk_split(as[0], spl, &s);
1914                 s1 = s;
1915                 clear_array(iamarray(av[1]));
1916                 for (i=1; i<=n; i++)
1917                         setari_u(av[1], i, nextword(&s1));
1918                 free(s);
1919                 setvar_i(res, n);
1920                 break;
1921
1922         case B_ss:
1923                 l = strlen(as[0]);
1924                 i = getvar_i(av[1]) - 1;
1925                 if (i>l) i=l; if (i<0) i=0;
1926                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1927                 if (n<0) n=0;
1928                 s = xmalloc(n+1);
1929                 strncpy(s, as[0]+i, n);
1930                 s[n] = '\0';
1931                 setvar_p(res, s);
1932                 break;
1933                 
1934         case B_an:
1935                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1936                 break;
1937                 
1938         case B_co:
1939                 setvar_i(res, ~(long)getvar_i(av[0]));
1940                 break;
1941
1942         case B_ls:
1943                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1944                 break;
1945
1946         case B_or:
1947                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1948                 break;
1949
1950         case B_rs:
1951                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1952                 break;
1953
1954         case B_xo:
1955                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1956                 break;
1957
1958         case B_lo:
1959                 to_xxx = tolower;
1960                 goto lo_cont;
1961
1962         case B_up:
1963                 to_xxx = toupper;
1964  lo_cont:
1965                 s1 = s = xstrdup(as[0]);
1966                 while (*s1) {
1967                         *s1 = (*to_xxx)(*s1);
1968                         s1++;
1969                 }
1970                 setvar_p(res, s);
1971                 break;
1972
1973         case B_ix:
1974                 n = 0;
1975                 ll = strlen(as[1]);
1976                 l = strlen(as[0]) - ll;
1977                 if (ll > 0 && l >= 0) {
1978                         if (! icase) {
1979                                 s = strstr(as[0], as[1]);
1980                                 if (s) n = (s - as[0]) + 1;
1981                         } else {
1982                                 /* this piece of code is terribly slow and
1983                                  * really should be rewritten
1984                                  */
1985                                 for (i=0; i<=l; i++) {
1986                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1987                                                 n = i+1;
1988                                                 break;
1989                                         }
1990                                 }
1991                         }
1992                 }
1993                 setvar_i(res, n);
1994                 break;
1995
1996         case B_ti:
1997                 if (nargs > 1)
1998                         tt = getvar_i(av[1]);
1999                 else
2000                         time(&tt);
2001                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2002                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2003                 buf[i] = '\0';
2004                 setvar_s(res, buf);
2005                 break;
2006
2007         case B_ma:
2008                 re = as_regex(an[1], &sreg);
2009                 n = regexec(re, as[0], 1, pmatch, 0);
2010                 if (n == 0) {
2011                         pmatch[0].rm_so++;
2012                         pmatch[0].rm_eo++;
2013                 } else {
2014                         pmatch[0].rm_so = 0;
2015                         pmatch[0].rm_eo = -1;
2016                 }
2017                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2018                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2019                 setvar_i(res, pmatch[0].rm_so);
2020                 if (re == &sreg) regfree(re);
2021                 break;
2022
2023         case B_ge:
2024                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2025                 break;
2026
2027         case B_gs:
2028                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2029                 break;
2030
2031         case B_su:
2032                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2033                 break;
2034         }
2035
2036         nvfree(tv);
2037         return res;
2038 }
2039
2040 /*
2041  * Evaluate node - the heart of the program. Supplied with subtree
2042  * and place where to store result. returns ptr to result.
2043  */
2044 #define XC(n) ((n) >> 8)
2045
2046 static var *evaluate(node *op, var *res)
2047 {
2048         /* This procedure is recursive so we should count every byte */
2049         static var *fnargs = NULL;
2050         static unsigned seed = 1;
2051         static regex_t sreg;
2052
2053         node *op1;
2054         var *v1;
2055         union {
2056                 var *v;
2057                 char *s;
2058                 double d;
2059                 int i;
2060         } L, R;
2061         uint32_t opinfo;
2062         short opn;
2063         union {
2064                 char *s;
2065                 rstream *rsm;
2066                 FILE *F;
2067                 var *v;
2068                 regex_t *re;
2069                 uint32_t info;
2070         } X;
2071
2072         if (!op)
2073                 return setvar_s(res, NULL);
2074
2075         v1 = nvalloc(2);
2076
2077         while (op) {
2078
2079                 opinfo = op->info;
2080                 opn = (short)(opinfo & OPNMASK);
2081                 lineno = op->lineno;
2082
2083                 /* execute inevitable things */
2084                 op1 = op->l.n;
2085                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2086                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2087                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2088                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2089                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2090
2091                 switch (XC(opinfo & OPCLSMASK)) {
2092
2093                   /* -- iterative node type -- */
2094
2095                   /* test pattern */
2096                 case XC( OC_TEST ):
2097                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2098                                 /* it's range pattern */
2099                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2100                                         op->info |= OF_CHECKED;
2101                                         if (ptest(op1->r.n))
2102                                                 op->info &= ~OF_CHECKED;
2103
2104                                         op = op->a.n;
2105                                 } else {
2106                                         op = op->r.n;
2107                                 }
2108                         } else {
2109                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2110                         }
2111                         break;
2112
2113                   /* just evaluate an expression, also used as unconditional jump */
2114                 case XC( OC_EXEC ):
2115                         break;
2116
2117                   /* branch, used in if-else and various loops */
2118                 case XC( OC_BR ):
2119                         op = istrue(L.v) ? op->a.n : op->r.n;
2120                         break;
2121
2122                   /* initialize for-in loop */
2123                 case XC( OC_WALKINIT ):
2124                         hashwalk_init(L.v, iamarray(R.v));
2125                         break;
2126
2127                   /* get next array item */
2128                 case XC( OC_WALKNEXT ):
2129                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2130                         break;
2131
2132                 case XC( OC_PRINT ):
2133                 case XC( OC_PRINTF ):
2134                         X.F = stdout;
2135                         if (op->r.n) {
2136                                 X.rsm = newfile(R.s);
2137                                 if (! X.rsm->F) {
2138                                         if (opn == '|') {
2139                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2140                                                         bb_perror_msg_and_die("popen");
2141                                                 X.rsm->is_pipe = 1;
2142                                         } else {
2143                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2144                                         }
2145                                 }
2146                                 X.F = X.rsm->F;
2147                         }
2148
2149                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2150                                 if (! op1) {
2151                                         fputs(getvar_s(V[F0]), X.F);
2152                                 } else {
2153                                         while (op1) {
2154                                                 L.v = evaluate(nextarg(&op1), v1);
2155                                                 if (L.v->type & VF_NUMBER) {
2156                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2157                                                                         getvar_i(L.v), TRUE);
2158                                                         fputs(buf, X.F);
2159                                                 } else {
2160                                                         fputs(getvar_s(L.v), X.F);
2161                                                 }
2162
2163                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2164                                         }
2165                                 }
2166                                 fputs(getvar_s(V[ORS]), X.F);
2167
2168                         } else {        /* OC_PRINTF */
2169                                 L.s = awk_printf(op1);
2170                                 fputs(L.s, X.F);
2171                                 free(L.s);
2172                         }
2173                         fflush(X.F);
2174                         break;
2175
2176                 case XC( OC_DELETE ):
2177                         X.info = op1->info & OPCLSMASK;
2178                         if (X.info == OC_VAR) {
2179                                 R.v = op1->l.v;
2180                         } else if (X.info == OC_FNARG) {
2181                                 R.v = &fnargs[op1->l.i];
2182                         } else {
2183                                 runtime_error(EMSG_NOT_ARRAY);
2184                         }
2185
2186                         if (op1->r.n) {
2187                                 clrvar(L.v);
2188                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2189                                 hash_remove(iamarray(R.v), L.s);
2190                         } else {
2191                                 clear_array(iamarray(R.v));
2192                         }
2193                         break;
2194
2195                 case XC( OC_NEWSOURCE ):
2196                         programname = op->l.s;
2197                         break;
2198
2199                 case XC( OC_RETURN ):
2200                         copyvar(res, L.v);
2201                         break;
2202
2203                 case XC( OC_NEXTFILE ):
2204                         nextfile = TRUE;
2205                 case XC( OC_NEXT ):
2206                         nextrec = TRUE;
2207                 case XC( OC_DONE ):
2208                         clrvar(res);
2209                         break;
2210
2211                 case XC( OC_EXIT ):
2212                         awk_exit(L.d);
2213
2214                   /* -- recursive node type -- */
2215
2216                 case XC( OC_VAR ):
2217                         L.v = op->l.v;
2218                         if (L.v == V[NF])
2219                                 split_f0();
2220                         goto v_cont;
2221
2222                 case XC( OC_FNARG ):
2223                         L.v = &fnargs[op->l.i];
2224  v_cont:
2225                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2226                         break;
2227
2228                 case XC( OC_IN ):
2229                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2230                         break;
2231
2232                 case XC( OC_REGEXP ):
2233                         op1 = op;
2234                         L.s = getvar_s(V[F0]);
2235                         goto re_cont;
2236
2237                 case XC( OC_MATCH ):
2238                         op1 = op->r.n;
2239  re_cont:
2240                         X.re = as_regex(op1, &sreg);
2241                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2242                         if (X.re == &sreg) regfree(X.re);
2243                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2244                         break;
2245
2246                 case XC( OC_MOVE ):
2247                         /* if source is a temporary string, jusk relink it to dest */
2248                         if (R.v == v1+1 && R.v->string) {
2249                                 res = setvar_p(L.v, R.v->string);
2250                                 R.v->string = NULL;
2251                         } else {
2252                                 res = copyvar(L.v, R.v);
2253                         }
2254                         break;
2255
2256                 case XC( OC_TERNARY ):
2257                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2258                                 runtime_error(EMSG_POSSIBLE_ERROR);
2259                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2260                         break;
2261
2262                 case XC( OC_FUNC ):
2263                         if (! op->r.f->body.first)
2264                                 runtime_error(EMSG_UNDEF_FUNC);
2265
2266                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2267                         while (op1) {
2268                                 L.v = evaluate(nextarg(&op1), v1);
2269                                 copyvar(R.v, L.v);
2270                                 R.v->type |= VF_CHILD;
2271                                 R.v->x.parent = L.v;
2272                                 if (++R.v - X.v >= op->r.f->nargs)
2273                                         break;
2274                         }
2275
2276                         R.v = fnargs;
2277                         fnargs = X.v;
2278
2279                         L.s = programname;
2280                         res = evaluate(op->r.f->body.first, res);
2281                         programname = L.s;
2282
2283                         nvfree(fnargs);
2284                         fnargs = R.v;
2285                         break;
2286
2287                 case XC( OC_GETLINE ):
2288                 case XC( OC_PGETLINE ):
2289                         if (op1) {
2290                                 X.rsm = newfile(L.s);
2291                                 if (! X.rsm->F) {
2292                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2293                                                 X.rsm->F = popen(L.s, "r");
2294                                                 X.rsm->is_pipe = TRUE;
2295                                         } else {
2296                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2297                                         }
2298                                 }
2299                         } else {
2300                                 if (! iF) iF = next_input_file();
2301                                 X.rsm = iF;
2302                         }
2303
2304                         if (! X.rsm->F) {
2305                                 setvar_i(V[ERRNO], errno);
2306                                 setvar_i(res, -1);
2307                                 break;
2308                         }
2309
2310                         if (! op->r.n)
2311                                 R.v = V[F0];
2312
2313                         L.i = awk_getline(X.rsm, R.v);
2314                         if (L.i > 0) {
2315                                 if (! op1) {
2316                                         incvar(V[FNR]);
2317                                         incvar(V[NR]);
2318                                 }
2319                         }
2320                         setvar_i(res, L.i);
2321                         break;
2322
2323                   /* simple builtins */
2324                 case XC( OC_FBLTIN ):
2325                         switch (opn) {
2326
2327                         case F_in:
2328                                 R.d = (int)L.d;
2329                                 break;
2330
2331                         case F_rn:
2332                                 R.d = (double)rand() / (double)RAND_MAX;
2333                                 break;
2334
2335 #if ENABLE_FEATURE_AWK_MATH
2336                         case F_co:
2337                                 R.d = cos(L.d);
2338                                 break;
2339
2340                         case F_ex:
2341                                 R.d = exp(L.d);
2342                                 break;
2343
2344                         case F_lg:
2345                                 R.d = log(L.d);
2346                                 break;
2347
2348                         case F_si:
2349                                 R.d = sin(L.d);
2350                                 break;
2351
2352                         case F_sq:
2353                                 R.d = sqrt(L.d);
2354                                 break;
2355 #else
2356                         case F_co:
2357                         case F_ex:
2358                         case F_lg:
2359                         case F_si:
2360                         case F_sq:
2361                                 runtime_error(EMSG_NO_MATH);
2362                                 break;
2363 #endif
2364
2365                         case F_sr:
2366                                 R.d = (double)seed;
2367                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2368                                 srand(seed);
2369                                 break;
2370
2371                         case F_ti:
2372                                 R.d = time(NULL);
2373                                 break;
2374
2375                         case F_le:
2376                                 if (! op1)
2377                                         L.s = getvar_s(V[F0]);
2378                                 R.d = strlen(L.s);
2379                                 break;
2380
2381                         case F_sy:
2382                                 fflush(NULL);
2383                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2384                                                 ? (system(L.s) >> 8) : 0;
2385                                 break;
2386
2387                         case F_ff:
2388                                 if (! op1)
2389                                         fflush(stdout);
2390                                 else {
2391                                         if (L.s && *L.s) {
2392                                                 X.rsm = newfile(L.s);
2393                                                 fflush(X.rsm->F);
2394                                         } else {
2395                                                 fflush(NULL);
2396                                         }
2397                                 }
2398                                 break;
2399
2400                         case F_cl:
2401                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2402                                 if (X.rsm) {
2403                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2404                                         free(X.rsm->buffer);
2405                                         hash_remove(fdhash, L.s);
2406                                 }
2407                                 if (R.i != 0)
2408                                         setvar_i(V[ERRNO], errno);
2409                                 R.d = (double)R.i;
2410                                 break;
2411                         }
2412                         setvar_i(res, R.d);
2413                         break;
2414
2415                 case XC( OC_BUILTIN ):
2416                         res = exec_builtin(op, res);
2417                         break;
2418
2419                 case XC( OC_SPRINTF ):
2420                         setvar_p(res, awk_printf(op1));
2421                         break;
2422
2423                 case XC( OC_UNARY ):
2424                         X.v = R.v;
2425                         L.d = R.d = getvar_i(R.v);
2426                         switch (opn) {
2427                         case 'P':
2428                                 L.d = ++R.d;
2429                                 goto r_op_change;
2430                         case 'p':
2431                                 R.d++;
2432                                 goto r_op_change;
2433                         case 'M':
2434                                 L.d = --R.d;
2435                                 goto r_op_change;
2436                         case 'm':
2437                                 R.d--;
2438                                 goto r_op_change;
2439                         case '!':
2440                                 L.d = istrue(X.v) ? 0 : 1;
2441                                 break;
2442                         case '-':
2443                                 L.d = -R.d;
2444                                 break;
2445  r_op_change:
2446                                 setvar_i(X.v, R.d);
2447                         }
2448                         setvar_i(res, L.d);
2449                         break;
2450
2451                 case XC( OC_FIELD ):
2452                         R.i = (int)getvar_i(R.v);
2453                         if (R.i == 0) {
2454                                 res = V[F0];
2455                         } else {
2456                                 split_f0();
2457                                 if (R.i > nfields)
2458                                         fsrealloc(R.i);
2459
2460                                 res = &Fields[R.i-1];
2461                         }
2462                         break;
2463
2464                   /* concatenation (" ") and index joining (",") */
2465                 case XC( OC_CONCAT ):
2466                 case XC( OC_COMMA ):
2467                         opn = strlen(L.s) + strlen(R.s) + 2;
2468                         X.s = xmalloc(opn);
2469                         strcpy(X.s, L.s);
2470                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2471                                 L.s = getvar_s(V[SUBSEP]);
2472                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2473                                 strcat(X.s, L.s);
2474                         }
2475                         strcat(X.s, R.s);
2476                         setvar_p(res, X.s);
2477                         break;
2478
2479                 case XC( OC_LAND ):
2480                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2481                         break;
2482
2483                 case XC( OC_LOR ):
2484                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2485                         break;
2486
2487                 case XC( OC_BINARY ):
2488                 case XC( OC_REPLACE ):
2489                         R.d = getvar_i(R.v);
2490                         switch (opn) {
2491                         case '+':
2492                                 L.d += R.d;
2493                                 break;
2494                         case '-':
2495                                 L.d -= R.d;
2496                                 break;
2497                         case '*':
2498                                 L.d *= R.d;
2499                                 break;
2500                         case '/':
2501                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502                                 L.d /= R.d;
2503                                 break;
2504                         case '&':
2505 #if ENABLE_FEATURE_AWK_MATH
2506                                 L.d = pow(L.d, R.d);
2507 #else
2508                                 runtime_error(EMSG_NO_MATH);
2509 #endif
2510                                 break;
2511                         case '%':
2512                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2513                                 L.d -= (int)(L.d / R.d) * R.d;
2514                                 break;
2515                         }
2516                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2517                         break;
2518
2519                 case XC( OC_COMPARE ):
2520                         if (is_numeric(L.v) && is_numeric(R.v)) {
2521                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2522                         } else {
2523                                 L.s = getvar_s(L.v);
2524                                 R.s = getvar_s(R.v);
2525                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2526                         }
2527                         switch (opn & 0xfe) {
2528                         case 0:
2529                                 R.i = (L.d > 0);
2530                                 break;
2531                         case 2:
2532                                 R.i = (L.d >= 0);
2533                                 break;
2534                         case 4:
2535                                 R.i = (L.d == 0);
2536                                 break;
2537                         }
2538                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2539                         break;
2540
2541                 default:
2542                         runtime_error(EMSG_POSSIBLE_ERROR);
2543                 }
2544                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2545                         op = op->a.n;
2546                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2547                         break;
2548                 if (nextrec)
2549                         break;
2550         }
2551         nvfree(v1);
2552         return res;
2553 }
2554
2555
2556 /* -------- main & co. -------- */
2557
2558 static int awk_exit(int r)
2559 {
2560         var tv;
2561         unsigned i;
2562         hash_item *hi;
2563
2564         zero_out_var(&tv);
2565
2566         if (!exiting) {
2567                 exiting = TRUE;
2568                 nextrec = FALSE;
2569                 evaluate(endseq.first, &tv);
2570         }
2571
2572         /* waiting for children */
2573         for (i = 0; i < fdhash->csize; i++) {
2574                 hi = fdhash->items[i];
2575                 while (hi) {
2576                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2577                                 pclose(hi->data.rs.F);
2578                         hi = hi->next;
2579                 }
2580         }
2581
2582         exit(r);
2583 }
2584
2585 /* if expr looks like "var=value", perform assignment and return 1,
2586  * otherwise return 0 */
2587 static int is_assignment(const char *expr)
2588 {
2589         char *exprc, *s, *s0, *s1;
2590
2591         exprc = xstrdup(expr);
2592         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2593                 free(exprc);
2594                 return FALSE;
2595         }
2596
2597         *(s++) = '\0';
2598         s0 = s1 = s;
2599         while (*s)
2600                 *(s1++) = nextchar(&s);
2601
2602         *s1 = '\0';
2603         setvar_u(newvar(exprc), s0);
2604         free(exprc);
2605         return TRUE;
2606 }
2607
2608 /* switch to next input file */
2609 static rstream *next_input_file(void)
2610 {
2611         static rstream rsm;
2612         FILE *F = NULL;
2613         char *fname, *ind;
2614         static int files_happen = FALSE;
2615
2616         if (rsm.F) fclose(rsm.F);
2617         rsm.F = NULL;
2618         rsm.pos = rsm.adv = 0;
2619
2620         do {
2621                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2622                         if (files_happen)
2623                                 return NULL;
2624                         fname = "-";
2625                         F = stdin;
2626                 } else {
2627                         ind = getvar_s(incvar(V[ARGIND]));
2628                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2629                         if (fname && *fname && !is_assignment(fname))
2630                                 F = afopen(fname, "r");
2631                 }
2632         } while (!F);
2633
2634         files_happen = TRUE;
2635         setvar_s(V[FILENAME], fname);
2636         rsm.F = F;
2637         return &rsm;
2638 }
2639
2640 int awk_main(int argc, char **argv)
2641 {
2642         unsigned opt;
2643         char *opt_F, *opt_v, *opt_W;
2644         int i, j, flen;
2645         var *v;
2646         var tv;
2647         char **envp;
2648         char *vnames = (char *)vNames; /* cheat */
2649         char *vvalues = (char *)vValues;
2650
2651         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2652          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2653         if (ENABLE_LOCALE_SUPPORT)
2654                 setlocale(LC_NUMERIC, "C");
2655
2656         zero_out_var(&tv);
2657
2658         /* allocate global buffer */
2659         buf = xmalloc(MAXVARFMT + 1);
2660
2661         vhash = hash_init();
2662         ahash = hash_init();
2663         fdhash = hash_init();
2664         fnhash = hash_init();
2665
2666         /* initialize variables */
2667         for (i = 0; *vnames; i++) {
2668                 V[i] = v = newvar(nextword(&vnames));
2669                 if (*vvalues != '\377')
2670                         setvar_s(v, nextword(&vvalues));
2671                 else
2672                         setvar_i(v, 0);
2673
2674                 if (*vnames == '*') {
2675                         v->type |= VF_SPECIAL;
2676                         vnames++;
2677                 }
2678         }
2679
2680         handle_special(V[FS]);
2681         handle_special(V[RS]);
2682
2683         newfile("/dev/stdin")->F = stdin;
2684         newfile("/dev/stdout")->F = stdout;
2685         newfile("/dev/stderr")->F = stderr;
2686
2687         for (envp = environ; *envp; envp++) {
2688                 char *s = xstrdup(*envp);
2689                 char *s1 = strchr(s, '=');
2690                 if (s1) {
2691                         *s1++ = '\0';
2692                         setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2693                 }
2694                 free(s);
2695         }
2696
2697         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2698         argv += optind;
2699         argc -= optind;
2700         if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2701         if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2702         if (opt & 0x4) { // -f
2703                 char *s = s; /* die, gcc, die */
2704                 FILE *from_file = afopen(programname, "r");
2705                 /* one byte is reserved for some trick in next_token */
2706                 if (fseek(from_file, 0, SEEK_END) == 0) {
2707                         flen = ftell(from_file);
2708                         s = xmalloc(flen + 4);
2709                         fseek(from_file, 0, SEEK_SET);
2710                         i = 1 + fread(s + 1, 1, flen, from_file);
2711                 } else {
2712                         for (i = j = 1; j > 0; i += j) {
2713                                 s = xrealloc(s, i + 4096);
2714                                 j = fread(s + i, 1, 4094, from_file);
2715                         }
2716                 }
2717                 s[i] = '\0';
2718                 fclose(from_file);
2719                 parse_program(s + 1);
2720                 free(s);
2721         } else { // no -f: take program from 1st parameter
2722                 if (!argc)
2723                         bb_show_usage();
2724                 programname = "cmd. line";
2725                 parse_program(*argv++);
2726                 argc--;
2727         }
2728         if (opt & 0x8) // -W
2729                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2730
2731         /* fill in ARGV array */
2732         setvar_i(V[ARGC], argc + 1);
2733         setari_u(V[ARGV], 0, "awk");
2734         i = 0;
2735         while (*argv)
2736                 setari_u(V[ARGV], ++i, *argv++);
2737
2738         evaluate(beginseq.first, &tv);
2739         if (!mainseq.first && !endseq.first)
2740                 awk_exit(EXIT_SUCCESS);
2741
2742         /* input file could already be opened in BEGIN block */
2743         if (!iF) iF = next_input_file();
2744
2745         /* passing through input files */
2746         while (iF) {
2747                 nextfile = FALSE;
2748                 setvar_i(V[FNR], 0);
2749
2750                 while ((i = awk_getline(iF, V[F0])) > 0) {
2751                         nextrec = FALSE;
2752                         incvar(V[NR]);
2753                         incvar(V[FNR]);
2754                         evaluate(mainseq.first, &tv);
2755
2756                         if (nextfile)
2757                                 break;
2758                 }
2759
2760                 if (i < 0)
2761                         runtime_error(strerror(errno));
2762
2763                 iF = next_input_file();
2764         }
2765
2766         awk_exit(EXIT_SUCCESS);
2767         /*return 0;*/
2768 }