*: remove "Options:" string from help texts
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //usage:#define awk_trivial_usage
11 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage:       "        -v VAR=VAL      Set variable"
14 //usage:     "\n        -F SEP          Use SEP as field separator"
15 //usage:     "\n        -f FILE         Read program from FILE"
16
17 #include "libbb.h"
18 #include "xregex.h"
19 #include <math.h>
20
21 /* This is a NOEXEC applet. Be very careful! */
22
23
24 /* If you comment out one of these below, it will be #defined later
25  * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...)  do {} while (0)
27 #define debug_printf_eval(...)  do {} while (0)
28
29 #ifndef debug_printf_walker
30 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31 #endif
32 #ifndef debug_printf_eval
33 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
34 #endif
35
36
37
38 #define MAXVARFMT       240
39 #define MINNVBLOCK      64
40
41 /* variable flags */
42 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
43 #define VF_ARRAY        0x0002  /* 1 = it's an array */
44
45 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
46 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
47 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
48 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
49 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
50 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
51 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
52
53 /* these flags are static, don't change them when value is changed */
54 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
55
56 typedef struct walker_list {
57         char *end;
58         char *cur;
59         struct walker_list *prev;
60         char wbuf[1];
61 } walker_list;
62
63 /* Variable */
64 typedef struct var_s {
65         unsigned type;            /* flags */
66         double number;
67         char *string;
68         union {
69                 int aidx;               /* func arg idx (for compilation stage) */
70                 struct xhash_s *array;  /* array ptr */
71                 struct var_s *parent;   /* for func args, ptr to actual parameter */
72                 walker_list *walker;    /* list of array elements (for..in) */
73         } x;
74 } var;
75
76 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
77 typedef struct chain_s {
78         struct node_s *first;
79         struct node_s *last;
80         const char *programname;
81 } chain;
82
83 /* Function */
84 typedef struct func_s {
85         unsigned nargs;
86         struct chain_s body;
87 } func;
88
89 /* I/O stream */
90 typedef struct rstream_s {
91         FILE *F;
92         char *buffer;
93         int adv;
94         int size;
95         int pos;
96         smallint is_pipe;
97 } rstream;
98
99 typedef struct hash_item_s {
100         union {
101                 struct var_s v;         /* variable/array hash */
102                 struct rstream_s rs;    /* redirect streams hash */
103                 struct func_s f;        /* functions hash */
104         } data;
105         struct hash_item_s *next;       /* next in chain */
106         char name[1];                   /* really it's longer */
107 } hash_item;
108
109 typedef struct xhash_s {
110         unsigned nel;           /* num of elements */
111         unsigned csize;         /* current hash size */
112         unsigned nprime;        /* next hash size in PRIMES[] */
113         unsigned glen;          /* summary length of item names */
114         struct hash_item_s **items;
115 } xhash;
116
117 /* Tree node */
118 typedef struct node_s {
119         uint32_t info;
120         unsigned lineno;
121         union {
122                 struct node_s *n;
123                 var *v;
124                 int aidx;
125                 char *new_progname;
126                 regex_t *re;
127         } l;
128         union {
129                 struct node_s *n;
130                 regex_t *ire;
131                 func *f;
132         } r;
133         union {
134                 struct node_s *n;
135         } a;
136 } node;
137
138 /* Block of temporary variables */
139 typedef struct nvblock_s {
140         int size;
141         var *pos;
142         struct nvblock_s *prev;
143         struct nvblock_s *next;
144         var nv[];
145 } nvblock;
146
147 typedef struct tsplitter_s {
148         node n;
149         regex_t re[2];
150 } tsplitter;
151
152 /* simple token classes */
153 /* Order and hex values are very important!!!  See next_token() */
154 #define TC_SEQSTART      1                              /* ( */
155 #define TC_SEQTERM      (1 << 1)                /* ) */
156 #define TC_REGEXP       (1 << 2)                /* /.../ */
157 #define TC_OUTRDR       (1 << 3)                /* | > >> */
158 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
159 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
160 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
161 #define TC_IN           (1 << 7)
162 #define TC_COMMA        (1 << 8)
163 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
164 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
165 #define TC_ARRTERM      (1 << 11)               /* ] */
166 #define TC_GRPSTART     (1 << 12)               /* { */
167 #define TC_GRPTERM      (1 << 13)               /* } */
168 #define TC_SEMICOL      (1 << 14)
169 #define TC_NEWLINE      (1 << 15)
170 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
171 #define TC_WHILE        (1 << 17)
172 #define TC_ELSE         (1 << 18)
173 #define TC_BUILTIN      (1 << 19)
174 #define TC_GETLINE      (1 << 20)
175 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
176 #define TC_BEGIN        (1 << 22)
177 #define TC_END          (1 << 23)
178 #define TC_EOF          (1 << 24)
179 #define TC_VARIABLE     (1 << 25)
180 #define TC_ARRAY        (1 << 26)
181 #define TC_FUNCTION     (1 << 27)
182 #define TC_STRING       (1 << 28)
183 #define TC_NUMBER       (1 << 29)
184
185 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
186
187 /* combined token classes */
188 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
189 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
190 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
191                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
192
193 #define TC_STATEMNT (TC_STATX | TC_WHILE)
194 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
195
196 /* word tokens, cannot mean something else if not expected */
197 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
198                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
199
200 /* discard newlines after these */
201 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
202                    | TC_BINOP | TC_OPTERM)
203
204 /* what can expression begin with */
205 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
206 /* what can group begin with */
207 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
208
209 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
210 /* operator is inserted between them */
211 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
212                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
213 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
214
215 #define OF_RES1    0x010000
216 #define OF_RES2    0x020000
217 #define OF_STR1    0x040000
218 #define OF_STR2    0x080000
219 #define OF_NUM1    0x100000
220 #define OF_CHECKED 0x200000
221
222 /* combined operator flags */
223 #define xx      0
224 #define xV      OF_RES2
225 #define xS      (OF_RES2 | OF_STR2)
226 #define Vx      OF_RES1
227 #define VV      (OF_RES1 | OF_RES2)
228 #define Nx      (OF_RES1 | OF_NUM1)
229 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
230 #define Sx      (OF_RES1 | OF_STR1)
231 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
232 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
233
234 #define OPCLSMASK 0xFF00
235 #define OPNMASK   0x007F
236
237 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
238  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
239  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
240  */
241 #define P(x)      (x << 24)
242 #define PRIMASK   0x7F000000
243 #define PRIMASK2  0x7E000000
244
245 /* Operation classes */
246
247 #define SHIFT_TIL_THIS  0x0600
248 #define RECUR_FROM_THIS 0x1000
249
250 enum {
251         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
252         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
253
254         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
255         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
256         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
257
258         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
259         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
260         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
261         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
262         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
263         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
264         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
265         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
266         OC_DONE = 0x2800,
267
268         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
269         ST_WHILE = 0x3300
270 };
271
272 /* simple builtins */
273 enum {
274         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
275         F_ti,   F_le,   F_sy,   F_ff,   F_cl
276 };
277
278 /* builtins */
279 enum {
280         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
281         B_ge,   B_gs,   B_su,
282         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
283 };
284
285 /* tokens and their corresponding info values */
286
287 #define NTC     "\377"  /* switch to next token class (tc<<1) */
288 #define NTCC    '\377'
289
290 #define OC_B  OC_BUILTIN
291
292 static const char tokenlist[] ALIGN1 =
293         "\1("         NTC
294         "\1)"         NTC
295         "\1/"         NTC                                   /* REGEXP */
296         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
297         "\2++"        "\2--"        NTC                     /* UOPPOST */
298         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
299         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
300         "\2*="        "\2/="        "\2%="      "\2^="
301         "\1+"         "\1-"         "\3**="     "\2**"
302         "\1/"         "\1%"         "\1^"       "\1*"
303         "\2!="        "\2>="        "\2<="      "\1>"
304         "\1<"         "\2!~"        "\1~"       "\2&&"
305         "\2||"        "\1?"         "\1:"       NTC
306         "\2in"        NTC
307         "\1,"         NTC
308         "\1|"         NTC
309         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
310         "\1]"         NTC
311         "\1{"         NTC
312         "\1}"         NTC
313         "\1;"         NTC
314         "\1\n"        NTC
315         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
316         "\10continue" "\6delete"    "\5print"
317         "\6printf"    "\4next"      "\10nextfile"
318         "\6return"    "\4exit"      NTC
319         "\5while"     NTC
320         "\4else"      NTC
321
322         "\3and"       "\5compl"     "\6lshift"  "\2or"
323         "\6rshift"    "\3xor"
324         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
325         "\3cos"       "\3exp"       "\3int"     "\3log"
326         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
327         "\6gensub"    "\4gsub"      "\5index"   "\6length"
328         "\5match"     "\5split"     "\7sprintf" "\3sub"
329         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
330         "\7tolower"   "\7toupper"   NTC
331         "\7getline"   NTC
332         "\4func"      "\10function" NTC
333         "\5BEGIN"     NTC
334         "\3END"
335         /* compiler adds trailing "\0" */
336         ;
337
338 static const uint32_t tokeninfo[] = {
339         0,
340         0,
341         OC_REGEXP,
342         xS|'a',                  xS|'w',                  xS|'|',
343         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
344         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
345         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
346         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
347         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
348         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
349         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
350         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
351         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
352         OC_IN|SV|P(49), /* in */
353         OC_COMMA|SS|P(80),
354         OC_PGETLINE|SV|P(37),
355         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
356         0, /* ] */
357         0,
358         0,
359         0,
360         0, /* \n */
361         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
362         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
363         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
364         OC_RETURN|Vx, OC_EXIT|Nx,
365         ST_WHILE,
366         0, /* else */
367
368         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
369         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
370         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
371         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
372         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
373         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
374         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
375         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
376         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
377         OC_GETLINE|SV|P(0),
378         0,                 0,
379         0,
380         0 /* END */
381 };
382
383 /* internal variable names and their initial values       */
384 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
385 enum {
386         CONVFMT,    OFMT,       FS,         OFS,
387         ORS,        RS,         RT,         FILENAME,
388         SUBSEP,     F0,         ARGIND,     ARGC,
389         ARGV,       ERRNO,      FNR,        NR,
390         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
391 };
392
393 static const char vNames[] ALIGN1 =
394         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
395         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
396         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
397         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
398         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
399
400 static const char vValues[] ALIGN1 =
401         "%.6g\0"    "%.6g\0"    " \0"       " \0"
402         "\n\0"      "\n\0"      "\0"        "\0"
403         "\034\0"    "\0"        "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61
407 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
408
409
410 /* Globals. Split in two parts so that first one is addressed
411  * with (mostly short) negative offsets.
412  * NB: it's unsafe to put members of type "double"
413  * into globals2 (gcc may fail to align them).
414  */
415 struct globals {
416         double t_double;
417         chain beginseq, mainseq, endseq;
418         chain *seq;
419         node *break_ptr, *continue_ptr;
420         rstream *iF;
421         xhash *vhash, *ahash, *fdhash, *fnhash;
422         const char *g_progname;
423         int g_lineno;
424         int nfields;
425         int maxfields; /* used in fsrealloc() only */
426         var *Fields;
427         nvblock *g_cb;
428         char *g_pos;
429         char *g_buf;
430         smallint icase;
431         smallint exiting;
432         smallint nextrec;
433         smallint nextfile;
434         smallint is_f0_split;
435 };
436 struct globals2 {
437         uint32_t t_info; /* often used */
438         uint32_t t_tclass;
439         char *t_string;
440         int t_lineno;
441         int t_rollback;
442
443         var *intvar[NUM_INTERNAL_VARS]; /* often used */
444
445         /* former statics from various functions */
446         char *split_f0__fstrings;
447
448         uint32_t next_token__save_tclass;
449         uint32_t next_token__save_info;
450         uint32_t next_token__ltclass;
451         smallint next_token__concat_inserted;
452
453         smallint next_input_file__files_happen;
454         rstream next_input_file__rsm;
455
456         var *evaluate__fnargs;
457         unsigned evaluate__seed;
458         regex_t evaluate__sreg;
459
460         var ptest__v;
461
462         tsplitter exec_builtin__tspl;
463
464         /* biggest and least used members go last */
465         tsplitter fsplitter, rsplitter;
466 };
467 #define G1 (ptr_to_globals[-1])
468 #define G (*(struct globals2 *)ptr_to_globals)
469 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
470 /*char G1size[sizeof(G1)]; - 0x74 */
471 /*char Gsize[sizeof(G)]; - 0x1c4 */
472 /* Trying to keep most of members accessible with short offsets: */
473 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
474 #define t_double     (G1.t_double    )
475 #define beginseq     (G1.beginseq    )
476 #define mainseq      (G1.mainseq     )
477 #define endseq       (G1.endseq      )
478 #define seq          (G1.seq         )
479 #define break_ptr    (G1.break_ptr   )
480 #define continue_ptr (G1.continue_ptr)
481 #define iF           (G1.iF          )
482 #define vhash        (G1.vhash       )
483 #define ahash        (G1.ahash       )
484 #define fdhash       (G1.fdhash      )
485 #define fnhash       (G1.fnhash      )
486 #define g_progname   (G1.g_progname  )
487 #define g_lineno     (G1.g_lineno    )
488 #define nfields      (G1.nfields     )
489 #define maxfields    (G1.maxfields   )
490 #define Fields       (G1.Fields      )
491 #define g_cb         (G1.g_cb        )
492 #define g_pos        (G1.g_pos       )
493 #define g_buf        (G1.g_buf       )
494 #define icase        (G1.icase       )
495 #define exiting      (G1.exiting     )
496 #define nextrec      (G1.nextrec     )
497 #define nextfile     (G1.nextfile    )
498 #define is_f0_split  (G1.is_f0_split )
499 #define t_info       (G.t_info      )
500 #define t_tclass     (G.t_tclass    )
501 #define t_string     (G.t_string    )
502 #define t_lineno     (G.t_lineno    )
503 #define t_rollback   (G.t_rollback  )
504 #define intvar       (G.intvar      )
505 #define fsplitter    (G.fsplitter   )
506 #define rsplitter    (G.rsplitter   )
507 #define INIT_G() do { \
508         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
509         G.next_token__ltclass = TC_OPTERM; \
510         G.evaluate__seed = 1; \
511 } while (0)
512
513
514 /* function prototypes */
515 static void handle_special(var *);
516 static node *parse_expr(uint32_t);
517 static void chain_group(void);
518 static var *evaluate(node *, var *);
519 static rstream *next_input_file(void);
520 static int fmt_num(char *, int, const char *, double, int);
521 static int awk_exit(int) NORETURN;
522
523 /* ---- error handling ---- */
524
525 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
526 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
527 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
528 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
529 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
530 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
531 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
532 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
533 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
534 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
535
536 static void zero_out_var(var *vp)
537 {
538         memset(vp, 0, sizeof(*vp));
539 }
540
541 static void syntax_error(const char *message) NORETURN;
542 static void syntax_error(const char *message)
543 {
544         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
545 }
546
547 /* ---- hash stuff ---- */
548
549 static unsigned hashidx(const char *name)
550 {
551         unsigned idx = 0;
552
553         while (*name)
554                 idx = *name++ + (idx << 6) - idx;
555         return idx;
556 }
557
558 /* create new hash */
559 static xhash *hash_init(void)
560 {
561         xhash *newhash;
562
563         newhash = xzalloc(sizeof(*newhash));
564         newhash->csize = FIRST_PRIME;
565         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
566
567         return newhash;
568 }
569
570 /* find item in hash, return ptr to data, NULL if not found */
571 static void *hash_search(xhash *hash, const char *name)
572 {
573         hash_item *hi;
574
575         hi = hash->items[hashidx(name) % hash->csize];
576         while (hi) {
577                 if (strcmp(hi->name, name) == 0)
578                         return &hi->data;
579                 hi = hi->next;
580         }
581         return NULL;
582 }
583
584 /* grow hash if it becomes too big */
585 static void hash_rebuild(xhash *hash)
586 {
587         unsigned newsize, i, idx;
588         hash_item **newitems, *hi, *thi;
589
590         if (hash->nprime == ARRAY_SIZE(PRIMES))
591                 return;
592
593         newsize = PRIMES[hash->nprime++];
594         newitems = xzalloc(newsize * sizeof(newitems[0]));
595
596         for (i = 0; i < hash->csize; i++) {
597                 hi = hash->items[i];
598                 while (hi) {
599                         thi = hi;
600                         hi = thi->next;
601                         idx = hashidx(thi->name) % newsize;
602                         thi->next = newitems[idx];
603                         newitems[idx] = thi;
604                 }
605         }
606
607         free(hash->items);
608         hash->csize = newsize;
609         hash->items = newitems;
610 }
611
612 /* find item in hash, add it if necessary. Return ptr to data */
613 static void *hash_find(xhash *hash, const char *name)
614 {
615         hash_item *hi;
616         unsigned idx;
617         int l;
618
619         hi = hash_search(hash, name);
620         if (!hi) {
621                 if (++hash->nel / hash->csize > 10)
622                         hash_rebuild(hash);
623
624                 l = strlen(name) + 1;
625                 hi = xzalloc(sizeof(*hi) + l);
626                 strcpy(hi->name, name);
627
628                 idx = hashidx(name) % hash->csize;
629                 hi->next = hash->items[idx];
630                 hash->items[idx] = hi;
631                 hash->glen += l;
632         }
633         return &hi->data;
634 }
635
636 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
637 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
638 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
639 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
640
641 static void hash_remove(xhash *hash, const char *name)
642 {
643         hash_item *hi, **phi;
644
645         phi = &hash->items[hashidx(name) % hash->csize];
646         while (*phi) {
647                 hi = *phi;
648                 if (strcmp(hi->name, name) == 0) {
649                         hash->glen -= (strlen(name) + 1);
650                         hash->nel--;
651                         *phi = hi->next;
652                         free(hi);
653                         break;
654                 }
655                 phi = &hi->next;
656         }
657 }
658
659 /* ------ some useful functions ------ */
660
661 static char *skip_spaces(char *p)
662 {
663         while (1) {
664                 if (*p == '\\' && p[1] == '\n') {
665                         p++;
666                         t_lineno++;
667                 } else if (*p != ' ' && *p != '\t') {
668                         break;
669                 }
670                 p++;
671         }
672         return p;
673 }
674
675 /* returns old *s, advances *s past word and terminating NUL */
676 static char *nextword(char **s)
677 {
678         char *p = *s;
679         while (*(*s)++ != '\0')
680                 continue;
681         return p;
682 }
683
684 static char nextchar(char **s)
685 {
686         char c, *pps;
687
688         c = *(*s)++;
689         pps = *s;
690         if (c == '\\')
691                 c = bb_process_escape_sequence((const char**)s);
692         if (c == '\\' && *s == pps) { /* unrecognized \z? */
693                 c = *(*s); /* yes, fetch z */
694                 if (c)
695                         (*s)++; /* advance unless z = NUL */
696         }
697         return c;
698 }
699
700 static ALWAYS_INLINE int isalnum_(int c)
701 {
702         return (isalnum(c) || c == '_');
703 }
704
705 static double my_strtod(char **pp)
706 {
707         char *cp = *pp;
708         if (ENABLE_DESKTOP && cp[0] == '0') {
709                 /* Might be hex or octal integer: 0x123abc or 07777 */
710                 char c = (cp[1] | 0x20);
711                 if (c == 'x' || isdigit(cp[1])) {
712                         unsigned long long ull = strtoull(cp, pp, 0);
713                         if (c == 'x')
714                                 return ull;
715                         c = **pp;
716                         if (!isdigit(c) && c != '.')
717                                 return ull;
718                         /* else: it may be a floating number. Examples:
719                          * 009.123 (*pp points to '9')
720                          * 000.123 (*pp points to '.')
721                          * fall through to strtod.
722                          */
723                 }
724         }
725         return strtod(cp, pp);
726 }
727
728 /* -------- working with variables (set/get/copy/etc) -------- */
729
730 static xhash *iamarray(var *v)
731 {
732         var *a = v;
733
734         while (a->type & VF_CHILD)
735                 a = a->x.parent;
736
737         if (!(a->type & VF_ARRAY)) {
738                 a->type |= VF_ARRAY;
739                 a->x.array = hash_init();
740         }
741         return a->x.array;
742 }
743
744 static void clear_array(xhash *array)
745 {
746         unsigned i;
747         hash_item *hi, *thi;
748
749         for (i = 0; i < array->csize; i++) {
750                 hi = array->items[i];
751                 while (hi) {
752                         thi = hi;
753                         hi = hi->next;
754                         free(thi->data.v.string);
755                         free(thi);
756                 }
757                 array->items[i] = NULL;
758         }
759         array->glen = array->nel = 0;
760 }
761
762 /* clear a variable */
763 static var *clrvar(var *v)
764 {
765         if (!(v->type & VF_FSTR))
766                 free(v->string);
767
768         v->type &= VF_DONTTOUCH;
769         v->type |= VF_DIRTY;
770         v->string = NULL;
771         return v;
772 }
773
774 /* assign string value to variable */
775 static var *setvar_p(var *v, char *value)
776 {
777         clrvar(v);
778         v->string = value;
779         handle_special(v);
780         return v;
781 }
782
783 /* same as setvar_p but make a copy of string */
784 static var *setvar_s(var *v, const char *value)
785 {
786         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
787 }
788
789 /* same as setvar_s but sets USER flag */
790 static var *setvar_u(var *v, const char *value)
791 {
792         v = setvar_s(v, value);
793         v->type |= VF_USER;
794         return v;
795 }
796
797 /* set array element to user string */
798 static void setari_u(var *a, int idx, const char *s)
799 {
800         var *v;
801
802         v = findvar(iamarray(a), itoa(idx));
803         setvar_u(v, s);
804 }
805
806 /* assign numeric value to variable */
807 static var *setvar_i(var *v, double value)
808 {
809         clrvar(v);
810         v->type |= VF_NUMBER;
811         v->number = value;
812         handle_special(v);
813         return v;
814 }
815
816 static const char *getvar_s(var *v)
817 {
818         /* if v is numeric and has no cached string, convert it to string */
819         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
820                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
821                 v->string = xstrdup(g_buf);
822                 v->type |= VF_CACHED;
823         }
824         return (v->string == NULL) ? "" : v->string;
825 }
826
827 static double getvar_i(var *v)
828 {
829         char *s;
830
831         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
832                 v->number = 0;
833                 s = v->string;
834                 if (s && *s) {
835                         debug_printf_eval("getvar_i: '%s'->", s);
836                         v->number = my_strtod(&s);
837                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
838                         if (v->type & VF_USER) {
839                                 s = skip_spaces(s);
840                                 if (*s != '\0')
841                                         v->type &= ~VF_USER;
842                         }
843                 } else {
844                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
845                         v->type &= ~VF_USER;
846                 }
847                 v->type |= VF_CACHED;
848         }
849         debug_printf_eval("getvar_i: %f\n", v->number);
850         return v->number;
851 }
852
853 /* Used for operands of bitwise ops */
854 static unsigned long getvar_i_int(var *v)
855 {
856         double d = getvar_i(v);
857
858         /* Casting doubles to longs is undefined for values outside
859          * of target type range. Try to widen it as much as possible */
860         if (d >= 0)
861                 return (unsigned long)d;
862         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
863         return - (long) (unsigned long) (-d);
864 }
865
866 static var *copyvar(var *dest, const var *src)
867 {
868         if (dest != src) {
869                 clrvar(dest);
870                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
871                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
872                 dest->number = src->number;
873                 if (src->string)
874                         dest->string = xstrdup(src->string);
875         }
876         handle_special(dest);
877         return dest;
878 }
879
880 static var *incvar(var *v)
881 {
882         return setvar_i(v, getvar_i(v) + 1.0);
883 }
884
885 /* return true if v is number or numeric string */
886 static int is_numeric(var *v)
887 {
888         getvar_i(v);
889         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
890 }
891
892 /* return 1 when value of v corresponds to true, 0 otherwise */
893 static int istrue(var *v)
894 {
895         if (is_numeric(v))
896                 return (v->number != 0);
897         return (v->string && v->string[0]);
898 }
899
900 /* temporary variables allocator. Last allocated should be first freed */
901 static var *nvalloc(int n)
902 {
903         nvblock *pb = NULL;
904         var *v, *r;
905         int size;
906
907         while (g_cb) {
908                 pb = g_cb;
909                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
910                         break;
911                 g_cb = g_cb->next;
912         }
913
914         if (!g_cb) {
915                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
916                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
917                 g_cb->size = size;
918                 g_cb->pos = g_cb->nv;
919                 g_cb->prev = pb;
920                 /*g_cb->next = NULL; - xzalloc did it */
921                 if (pb)
922                         pb->next = g_cb;
923         }
924
925         v = r = g_cb->pos;
926         g_cb->pos += n;
927
928         while (v < g_cb->pos) {
929                 v->type = 0;
930                 v->string = NULL;
931                 v++;
932         }
933
934         return r;
935 }
936
937 static void nvfree(var *v)
938 {
939         var *p;
940
941         if (v < g_cb->nv || v >= g_cb->pos)
942                 syntax_error(EMSG_INTERNAL_ERROR);
943
944         for (p = v; p < g_cb->pos; p++) {
945                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
946                         clear_array(iamarray(p));
947                         free(p->x.array->items);
948                         free(p->x.array);
949                 }
950                 if (p->type & VF_WALK) {
951                         walker_list *n;
952                         walker_list *w = p->x.walker;
953                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
954                         p->x.walker = NULL;
955                         while (w) {
956                                 n = w->prev;
957                                 debug_printf_walker(" free(%p)\n", w);
958                                 free(w);
959                                 w = n;
960                         }
961                 }
962                 clrvar(p);
963         }
964
965         g_cb->pos = v;
966         while (g_cb->prev && g_cb->pos == g_cb->nv) {
967                 g_cb = g_cb->prev;
968         }
969 }
970
971 /* ------- awk program text parsing ------- */
972
973 /* Parse next token pointed by global pos, place results into global ttt.
974  * If token isn't expected, give away. Return token class
975  */
976 static uint32_t next_token(uint32_t expected)
977 {
978 #define concat_inserted (G.next_token__concat_inserted)
979 #define save_tclass     (G.next_token__save_tclass)
980 #define save_info       (G.next_token__save_info)
981 /* Initialized to TC_OPTERM: */
982 #define ltclass         (G.next_token__ltclass)
983
984         char *p, *s;
985         const char *tl;
986         uint32_t tc;
987         const uint32_t *ti;
988
989         if (t_rollback) {
990                 t_rollback = FALSE;
991
992         } else if (concat_inserted) {
993                 concat_inserted = FALSE;
994                 t_tclass = save_tclass;
995                 t_info = save_info;
996
997         } else {
998                 p = g_pos;
999  readnext:
1000                 p = skip_spaces(p);
1001                 g_lineno = t_lineno;
1002                 if (*p == '#')
1003                         while (*p != '\n' && *p != '\0')
1004                                 p++;
1005
1006                 if (*p == '\n')
1007                         t_lineno++;
1008
1009                 if (*p == '\0') {
1010                         tc = TC_EOF;
1011
1012                 } else if (*p == '\"') {
1013                         /* it's a string */
1014                         t_string = s = ++p;
1015                         while (*p != '\"') {
1016                                 char *pp;
1017                                 if (*p == '\0' || *p == '\n')
1018                                         syntax_error(EMSG_UNEXP_EOS);
1019                                 pp = p;
1020                                 *s++ = nextchar(&pp);
1021                                 p = pp;
1022                         }
1023                         p++;
1024                         *s = '\0';
1025                         tc = TC_STRING;
1026
1027                 } else if ((expected & TC_REGEXP) && *p == '/') {
1028                         /* it's regexp */
1029                         t_string = s = ++p;
1030                         while (*p != '/') {
1031                                 if (*p == '\0' || *p == '\n')
1032                                         syntax_error(EMSG_UNEXP_EOS);
1033                                 *s = *p++;
1034                                 if (*s++ == '\\') {
1035                                         char *pp = p;
1036                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1037                                         if (*p == '\\')
1038                                                 *s++ = '\\';
1039                                         if (pp == p)
1040                                                 *s++ = *p++;
1041                                         else
1042                                                 p = pp;
1043                                 }
1044                         }
1045                         p++;
1046                         *s = '\0';
1047                         tc = TC_REGEXP;
1048
1049                 } else if (*p == '.' || isdigit(*p)) {
1050                         /* it's a number */
1051                         char *pp = p;
1052                         t_double = my_strtod(&pp);
1053                         p = pp;
1054                         if (*p == '.')
1055                                 syntax_error(EMSG_UNEXP_TOKEN);
1056                         tc = TC_NUMBER;
1057
1058                 } else {
1059                         /* search for something known */
1060                         tl = tokenlist;
1061                         tc = 0x00000001;
1062                         ti = tokeninfo;
1063                         while (*tl) {
1064                                 int l = (unsigned char) *tl++;
1065                                 if (l == (unsigned char) NTCC) {
1066                                         tc <<= 1;
1067                                         continue;
1068                                 }
1069                                 /* if token class is expected,
1070                                  * token matches,
1071                                  * and it's not a longer word,
1072                                  */
1073                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1074                                  && strncmp(p, tl, l) == 0
1075                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1076                                 ) {
1077                                         /* then this is what we are looking for */
1078                                         t_info = *ti;
1079                                         p += l;
1080                                         goto token_found;
1081                                 }
1082                                 ti++;
1083                                 tl += l;
1084                         }
1085                         /* not a known token */
1086
1087                         /* is it a name? (var/array/function) */
1088                         if (!isalnum_(*p))
1089                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1090                         /* yes */
1091                         t_string = --p;
1092                         while (isalnum_(*++p)) {
1093                                 p[-1] = *p;
1094                         }
1095                         p[-1] = '\0';
1096                         tc = TC_VARIABLE;
1097                         /* also consume whitespace between functionname and bracket */
1098                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1099                                 p = skip_spaces(p);
1100                         if (*p == '(') {
1101                                 tc = TC_FUNCTION;
1102                         } else {
1103                                 if (*p == '[') {
1104                                         p++;
1105                                         tc = TC_ARRAY;
1106                                 }
1107                         }
1108  token_found: ;
1109                 }
1110                 g_pos = p;
1111
1112                 /* skipping newlines in some cases */
1113                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1114                         goto readnext;
1115
1116                 /* insert concatenation operator when needed */
1117                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1118                         concat_inserted = TRUE;
1119                         save_tclass = tc;
1120                         save_info = t_info;
1121                         tc = TC_BINOP;
1122                         t_info = OC_CONCAT | SS | P(35);
1123                 }
1124
1125                 t_tclass = tc;
1126         }
1127         ltclass = t_tclass;
1128
1129         /* Are we ready for this? */
1130         if (!(ltclass & expected))
1131                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1132                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1133
1134         return ltclass;
1135 #undef concat_inserted
1136 #undef save_tclass
1137 #undef save_info
1138 #undef ltclass
1139 }
1140
1141 static void rollback_token(void)
1142 {
1143         t_rollback = TRUE;
1144 }
1145
1146 static node *new_node(uint32_t info)
1147 {
1148         node *n;
1149
1150         n = xzalloc(sizeof(node));
1151         n->info = info;
1152         n->lineno = g_lineno;
1153         return n;
1154 }
1155
1156 static void mk_re_node(const char *s, node *n, regex_t *re)
1157 {
1158         n->info = OC_REGEXP;
1159         n->l.re = re;
1160         n->r.ire = re + 1;
1161         xregcomp(re, s, REG_EXTENDED);
1162         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1163 }
1164
1165 static node *condition(void)
1166 {
1167         next_token(TC_SEQSTART);
1168         return parse_expr(TC_SEQTERM);
1169 }
1170
1171 /* parse expression terminated by given argument, return ptr
1172  * to built subtree. Terminator is eaten by parse_expr */
1173 static node *parse_expr(uint32_t iexp)
1174 {
1175         node sn;
1176         node *cn = &sn;
1177         node *vn, *glptr;
1178         uint32_t tc, xtc;
1179         var *v;
1180
1181         sn.info = PRIMASK;
1182         sn.r.n = glptr = NULL;
1183         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1184
1185         while (!((tc = next_token(xtc)) & iexp)) {
1186
1187                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1188                         /* input redirection (<) attached to glptr node */
1189                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1190                         cn->a.n = glptr;
1191                         xtc = TC_OPERAND | TC_UOPPRE;
1192                         glptr = NULL;
1193
1194                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1195                         /* for binary and postfix-unary operators, jump back over
1196                          * previous operators with higher priority */
1197                         vn = cn;
1198                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1199                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1200                         ) {
1201                                 vn = vn->a.n;
1202                         }
1203                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1204                                 t_info += P(6);
1205                         cn = vn->a.n->r.n = new_node(t_info);
1206                         cn->a.n = vn->a.n;
1207                         if (tc & TC_BINOP) {
1208                                 cn->l.n = vn;
1209                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1211                                         /* it's a pipe */
1212                                         next_token(TC_GETLINE);
1213                                         /* give maximum priority to this pipe */
1214                                         cn->info &= ~PRIMASK;
1215                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1216                                 }
1217                         } else {
1218                                 cn->r.n = vn;
1219                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1220                         }
1221                         vn->a.n = cn;
1222
1223                 } else {
1224                         /* for operands and prefix-unary operators, attach them
1225                          * to last node */
1226                         vn = cn;
1227                         cn = vn->r.n = new_node(t_info);
1228                         cn->a.n = vn;
1229                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1230                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1231                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1232                                 /* one should be very careful with switch on tclass -
1233                                  * only simple tclasses should be used! */
1234                                 switch (tc) {
1235                                 case TC_VARIABLE:
1236                                 case TC_ARRAY:
1237                                         cn->info = OC_VAR;
1238                                         v = hash_search(ahash, t_string);
1239                                         if (v != NULL) {
1240                                                 cn->info = OC_FNARG;
1241                                                 cn->l.aidx = v->x.aidx;
1242                                         } else {
1243                                                 cn->l.v = newvar(t_string);
1244                                         }
1245                                         if (tc & TC_ARRAY) {
1246                                                 cn->info |= xS;
1247                                                 cn->r.n = parse_expr(TC_ARRTERM);
1248                                         }
1249                                         break;
1250
1251                                 case TC_NUMBER:
1252                                 case TC_STRING:
1253                                         cn->info = OC_VAR;
1254                                         v = cn->l.v = xzalloc(sizeof(var));
1255                                         if (tc & TC_NUMBER)
1256                                                 setvar_i(v, t_double);
1257                                         else
1258                                                 setvar_s(v, t_string);
1259                                         break;
1260
1261                                 case TC_REGEXP:
1262                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1263                                         break;
1264
1265                                 case TC_FUNCTION:
1266                                         cn->info = OC_FUNC;
1267                                         cn->r.f = newfunc(t_string);
1268                                         cn->l.n = condition();
1269                                         break;
1270
1271                                 case TC_SEQSTART:
1272                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1273                                         cn->a.n = vn;
1274                                         break;
1275
1276                                 case TC_GETLINE:
1277                                         glptr = cn;
1278                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1279                                         break;
1280
1281                                 case TC_BUILTIN:
1282                                         cn->l.n = condition();
1283                                         break;
1284                                 }
1285                         }
1286                 }
1287         }
1288         return sn.r.n;
1289 }
1290
1291 /* add node to chain. Return ptr to alloc'd node */
1292 static node *chain_node(uint32_t info)
1293 {
1294         node *n;
1295
1296         if (!seq->first)
1297                 seq->first = seq->last = new_node(0);
1298
1299         if (seq->programname != g_progname) {
1300                 seq->programname = g_progname;
1301                 n = chain_node(OC_NEWSOURCE);
1302                 n->l.new_progname = xstrdup(g_progname);
1303         }
1304
1305         n = seq->last;
1306         n->info = info;
1307         seq->last = n->a.n = new_node(OC_DONE);
1308
1309         return n;
1310 }
1311
1312 static void chain_expr(uint32_t info)
1313 {
1314         node *n;
1315
1316         n = chain_node(info);
1317         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1318         if (t_tclass & TC_GRPTERM)
1319                 rollback_token();
1320 }
1321
1322 static node *chain_loop(node *nn)
1323 {
1324         node *n, *n2, *save_brk, *save_cont;
1325
1326         save_brk = break_ptr;
1327         save_cont = continue_ptr;
1328
1329         n = chain_node(OC_BR | Vx);
1330         continue_ptr = new_node(OC_EXEC);
1331         break_ptr = new_node(OC_EXEC);
1332         chain_group();
1333         n2 = chain_node(OC_EXEC | Vx);
1334         n2->l.n = nn;
1335         n2->a.n = n;
1336         continue_ptr->a.n = n2;
1337         break_ptr->a.n = n->r.n = seq->last;
1338
1339         continue_ptr = save_cont;
1340         break_ptr = save_brk;
1341
1342         return n;
1343 }
1344
1345 /* parse group and attach it to chain */
1346 static void chain_group(void)
1347 {
1348         uint32_t c;
1349         node *n, *n2, *n3;
1350
1351         do {
1352                 c = next_token(TC_GRPSEQ);
1353         } while (c & TC_NEWLINE);
1354
1355         if (c & TC_GRPSTART) {
1356                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1357                         if (t_tclass & TC_NEWLINE)
1358                                 continue;
1359                         rollback_token();
1360                         chain_group();
1361                 }
1362         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1363                 rollback_token();
1364                 chain_expr(OC_EXEC | Vx);
1365         } else {                                                /* TC_STATEMNT */
1366                 switch (t_info & OPCLSMASK) {
1367                 case ST_IF:
1368                         n = chain_node(OC_BR | Vx);
1369                         n->l.n = condition();
1370                         chain_group();
1371                         n2 = chain_node(OC_EXEC);
1372                         n->r.n = seq->last;
1373                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1374                                 chain_group();
1375                                 n2->a.n = seq->last;
1376                         } else {
1377                                 rollback_token();
1378                         }
1379                         break;
1380
1381                 case ST_WHILE:
1382                         n2 = condition();
1383                         n = chain_loop(NULL);
1384                         n->l.n = n2;
1385                         break;
1386
1387                 case ST_DO:
1388                         n2 = chain_node(OC_EXEC);
1389                         n = chain_loop(NULL);
1390                         n2->a.n = n->a.n;
1391                         next_token(TC_WHILE);
1392                         n->l.n = condition();
1393                         break;
1394
1395                 case ST_FOR:
1396                         next_token(TC_SEQSTART);
1397                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1398                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1399                                 if ((n2->info & OPCLSMASK) != OC_IN)
1400                                         syntax_error(EMSG_UNEXP_TOKEN);
1401                                 n = chain_node(OC_WALKINIT | VV);
1402                                 n->l.n = n2->l.n;
1403                                 n->r.n = n2->r.n;
1404                                 n = chain_loop(NULL);
1405                                 n->info = OC_WALKNEXT | Vx;
1406                                 n->l.n = n2->l.n;
1407                         } else {                        /* for (;;) */
1408                                 n = chain_node(OC_EXEC | Vx);
1409                                 n->l.n = n2;
1410                                 n2 = parse_expr(TC_SEMICOL);
1411                                 n3 = parse_expr(TC_SEQTERM);
1412                                 n = chain_loop(n3);
1413                                 n->l.n = n2;
1414                                 if (!n2)
1415                                         n->info = OC_EXEC;
1416                         }
1417                         break;
1418
1419                 case OC_PRINT:
1420                 case OC_PRINTF:
1421                         n = chain_node(t_info);
1422                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1423                         if (t_tclass & TC_OUTRDR) {
1424                                 n->info |= t_info;
1425                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1426                         }
1427                         if (t_tclass & TC_GRPTERM)
1428                                 rollback_token();
1429                         break;
1430
1431                 case OC_BREAK:
1432                         n = chain_node(OC_EXEC);
1433                         n->a.n = break_ptr;
1434                         break;
1435
1436                 case OC_CONTINUE:
1437                         n = chain_node(OC_EXEC);
1438                         n->a.n = continue_ptr;
1439                         break;
1440
1441                 /* delete, next, nextfile, return, exit */
1442                 default:
1443                         chain_expr(t_info);
1444                 }
1445         }
1446 }
1447
1448 static void parse_program(char *p)
1449 {
1450         uint32_t tclass;
1451         node *cn;
1452         func *f;
1453         var *v;
1454
1455         g_pos = p;
1456         t_lineno = 1;
1457         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1458                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1459
1460                 if (tclass & TC_OPTERM)
1461                         continue;
1462
1463                 seq = &mainseq;
1464                 if (tclass & TC_BEGIN) {
1465                         seq = &beginseq;
1466                         chain_group();
1467
1468                 } else if (tclass & TC_END) {
1469                         seq = &endseq;
1470                         chain_group();
1471
1472                 } else if (tclass & TC_FUNCDECL) {
1473                         next_token(TC_FUNCTION);
1474                         g_pos++;
1475                         f = newfunc(t_string);
1476                         f->body.first = NULL;
1477                         f->nargs = 0;
1478                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1479                                 v = findvar(ahash, t_string);
1480                                 v->x.aidx = f->nargs++;
1481
1482                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1483                                         break;
1484                         }
1485                         seq = &f->body;
1486                         chain_group();
1487                         clear_array(ahash);
1488
1489                 } else if (tclass & TC_OPSEQ) {
1490                         rollback_token();
1491                         cn = chain_node(OC_TEST);
1492                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1493                         if (t_tclass & TC_GRPSTART) {
1494                                 rollback_token();
1495                                 chain_group();
1496                         } else {
1497                                 chain_node(OC_PRINT);
1498                         }
1499                         cn->r.n = mainseq.last;
1500
1501                 } else /* if (tclass & TC_GRPSTART) */ {
1502                         rollback_token();
1503                         chain_group();
1504                 }
1505         }
1506 }
1507
1508
1509 /* -------- program execution part -------- */
1510
1511 static node *mk_splitter(const char *s, tsplitter *spl)
1512 {
1513         regex_t *re, *ire;
1514         node *n;
1515
1516         re = &spl->re[0];
1517         ire = &spl->re[1];
1518         n = &spl->n;
1519         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1520                 regfree(re);
1521                 regfree(ire); // TODO: nuke ire, use re+1?
1522         }
1523         if (s[0] && s[1]) { /* strlen(s) > 1 */
1524                 mk_re_node(s, n, re);
1525         } else {
1526                 n->info = (uint32_t) s[0];
1527         }
1528
1529         return n;
1530 }
1531
1532 /* use node as a regular expression. Supplied with node ptr and regex_t
1533  * storage space. Return ptr to regex (if result points to preg, it should
1534  * be later regfree'd manually
1535  */
1536 static regex_t *as_regex(node *op, regex_t *preg)
1537 {
1538         int cflags;
1539         var *v;
1540         const char *s;
1541
1542         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1543                 return icase ? op->r.ire : op->l.re;
1544         }
1545         v = nvalloc(1);
1546         s = getvar_s(evaluate(op, v));
1547
1548         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1549         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1550          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1551          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1552          * (maybe gsub is not supposed to use REG_EXTENDED?).
1553          */
1554         if (regcomp(preg, s, cflags)) {
1555                 cflags &= ~REG_EXTENDED;
1556                 xregcomp(preg, s, cflags);
1557         }
1558         nvfree(v);
1559         return preg;
1560 }
1561
1562 /* gradually increasing buffer.
1563  * note that we reallocate even if n == old_size,
1564  * and thus there is at least one extra allocated byte.
1565  */
1566 static char* qrealloc(char *b, int n, int *size)
1567 {
1568         if (!b || n >= *size) {
1569                 *size = n + (n>>1) + 80;
1570                 b = xrealloc(b, *size);
1571         }
1572         return b;
1573 }
1574
1575 /* resize field storage space */
1576 static void fsrealloc(int size)
1577 {
1578         int i;
1579
1580         if (size >= maxfields) {
1581                 i = maxfields;
1582                 maxfields = size + 16;
1583                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1584                 for (; i < maxfields; i++) {
1585                         Fields[i].type = VF_SPECIAL;
1586                         Fields[i].string = NULL;
1587                 }
1588         }
1589         /* if size < nfields, clear extra field variables */
1590         for (i = size; i < nfields; i++) {
1591                 clrvar(Fields + i);
1592         }
1593         nfields = size;
1594 }
1595
1596 static int awk_split(const char *s, node *spl, char **slist)
1597 {
1598         int l, n;
1599         char c[4];
1600         char *s1;
1601         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1602
1603         /* in worst case, each char would be a separate field */
1604         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1605         strcpy(s1, s);
1606
1607         c[0] = c[1] = (char)spl->info;
1608         c[2] = c[3] = '\0';
1609         if (*getvar_s(intvar[RS]) == '\0')
1610                 c[2] = '\n';
1611
1612         n = 0;
1613         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1614                 if (!*s)
1615                         return n; /* "": zero fields */
1616                 n++; /* at least one field will be there */
1617                 do {
1618                         l = strcspn(s, c+2); /* len till next NUL or \n */
1619                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1620                          && pmatch[0].rm_so <= l
1621                         ) {
1622                                 l = pmatch[0].rm_so;
1623                                 if (pmatch[0].rm_eo == 0) {
1624                                         l++;
1625                                         pmatch[0].rm_eo++;
1626                                 }
1627                                 n++; /* we saw yet another delimiter */
1628                         } else {
1629                                 pmatch[0].rm_eo = l;
1630                                 if (s[l])
1631                                         pmatch[0].rm_eo++;
1632                         }
1633                         memcpy(s1, s, l);
1634                         /* make sure we remove *all* of the separator chars */
1635                         do {
1636                                 s1[l] = '\0';
1637                         } while (++l < pmatch[0].rm_eo);
1638                         nextword(&s1);
1639                         s += pmatch[0].rm_eo;
1640                 } while (*s);
1641                 return n;
1642         }
1643         if (c[0] == '\0') {  /* null split */
1644                 while (*s) {
1645                         *s1++ = *s++;
1646                         *s1++ = '\0';
1647                         n++;
1648                 }
1649                 return n;
1650         }
1651         if (c[0] != ' ') {  /* single-character split */
1652                 if (icase) {
1653                         c[0] = toupper(c[0]);
1654                         c[1] = tolower(c[1]);
1655                 }
1656                 if (*s1)
1657                         n++;
1658                 while ((s1 = strpbrk(s1, c)) != NULL) {
1659                         *s1++ = '\0';
1660                         n++;
1661                 }
1662                 return n;
1663         }
1664         /* space split */
1665         while (*s) {
1666                 s = skip_whitespace(s);
1667                 if (!*s)
1668                         break;
1669                 n++;
1670                 while (*s && !isspace(*s))
1671                         *s1++ = *s++;
1672                 *s1++ = '\0';
1673         }
1674         return n;
1675 }
1676
1677 static void split_f0(void)
1678 {
1679 /* static char *fstrings; */
1680 #define fstrings (G.split_f0__fstrings)
1681
1682         int i, n;
1683         char *s;
1684
1685         if (is_f0_split)
1686                 return;
1687
1688         is_f0_split = TRUE;
1689         free(fstrings);
1690         fsrealloc(0);
1691         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1692         fsrealloc(n);
1693         s = fstrings;
1694         for (i = 0; i < n; i++) {
1695                 Fields[i].string = nextword(&s);
1696                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1697         }
1698
1699         /* set NF manually to avoid side effects */
1700         clrvar(intvar[NF]);
1701         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1702         intvar[NF]->number = nfields;
1703 #undef fstrings
1704 }
1705
1706 /* perform additional actions when some internal variables changed */
1707 static void handle_special(var *v)
1708 {
1709         int n;
1710         char *b;
1711         const char *sep, *s;
1712         int sl, l, len, i, bsize;
1713
1714         if (!(v->type & VF_SPECIAL))
1715                 return;
1716
1717         if (v == intvar[NF]) {
1718                 n = (int)getvar_i(v);
1719                 fsrealloc(n);
1720
1721                 /* recalculate $0 */
1722                 sep = getvar_s(intvar[OFS]);
1723                 sl = strlen(sep);
1724                 b = NULL;
1725                 len = 0;
1726                 for (i = 0; i < n; i++) {
1727                         s = getvar_s(&Fields[i]);
1728                         l = strlen(s);
1729                         if (b) {
1730                                 memcpy(b+len, sep, sl);
1731                                 len += sl;
1732                         }
1733                         b = qrealloc(b, len+l+sl, &bsize);
1734                         memcpy(b+len, s, l);
1735                         len += l;
1736                 }
1737                 if (b)
1738                         b[len] = '\0';
1739                 setvar_p(intvar[F0], b);
1740                 is_f0_split = TRUE;
1741
1742         } else if (v == intvar[F0]) {
1743                 is_f0_split = FALSE;
1744
1745         } else if (v == intvar[FS]) {
1746                 mk_splitter(getvar_s(v), &fsplitter);
1747
1748         } else if (v == intvar[RS]) {
1749                 mk_splitter(getvar_s(v), &rsplitter);
1750
1751         } else if (v == intvar[IGNORECASE]) {
1752                 icase = istrue(v);
1753
1754         } else {                                /* $n */
1755                 n = getvar_i(intvar[NF]);
1756                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1757                 /* right here v is invalid. Just to note... */
1758         }
1759 }
1760
1761 /* step through func/builtin/etc arguments */
1762 static node *nextarg(node **pn)
1763 {
1764         node *n;
1765
1766         n = *pn;
1767         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1768                 *pn = n->r.n;
1769                 n = n->l.n;
1770         } else {
1771                 *pn = NULL;
1772         }
1773         return n;
1774 }
1775
1776 static void hashwalk_init(var *v, xhash *array)
1777 {
1778         hash_item *hi;
1779         unsigned i;
1780         walker_list *w;
1781         walker_list *prev_walker;
1782
1783         if (v->type & VF_WALK) {
1784                 prev_walker = v->x.walker;
1785         } else {
1786                 v->type |= VF_WALK;
1787                 prev_walker = NULL;
1788         }
1789         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1790
1791         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1792         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1793         w->cur = w->end = w->wbuf;
1794         w->prev = prev_walker;
1795         for (i = 0; i < array->csize; i++) {
1796                 hi = array->items[i];
1797                 while (hi) {
1798                         strcpy(w->end, hi->name);
1799                         nextword(&w->end);
1800                         hi = hi->next;
1801                 }
1802         }
1803 }
1804
1805 static int hashwalk_next(var *v)
1806 {
1807         walker_list *w = v->x.walker;
1808
1809         if (w->cur >= w->end) {
1810                 walker_list *prev_walker = w->prev;
1811
1812                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1813                 free(w);
1814                 v->x.walker = prev_walker;
1815                 return FALSE;
1816         }
1817
1818         setvar_s(v, nextword(&w->cur));
1819         return TRUE;
1820 }
1821
1822 /* evaluate node, return 1 when result is true, 0 otherwise */
1823 static int ptest(node *pattern)
1824 {
1825         /* ptest__v is "static": to save stack space? */
1826         return istrue(evaluate(pattern, &G.ptest__v));
1827 }
1828
1829 /* read next record from stream rsm into a variable v */
1830 static int awk_getline(rstream *rsm, var *v)
1831 {
1832         char *b;
1833         regmatch_t pmatch[2];
1834         int size, a, p, pp = 0;
1835         int fd, so, eo, r, rp;
1836         char c, *m, *s;
1837
1838         debug_printf_eval("entered %s()\n", __func__);
1839
1840         /* we're using our own buffer since we need access to accumulating
1841          * characters
1842          */
1843         fd = fileno(rsm->F);
1844         m = rsm->buffer;
1845         a = rsm->adv;
1846         p = rsm->pos;
1847         size = rsm->size;
1848         c = (char) rsplitter.n.info;
1849         rp = 0;
1850
1851         if (!m)
1852                 m = qrealloc(m, 256, &size);
1853
1854         do {
1855                 b = m + a;
1856                 so = eo = p;
1857                 r = 1;
1858                 if (p > 0) {
1859                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1860                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1861                                                         b, 1, pmatch, 0) == 0) {
1862                                         so = pmatch[0].rm_so;
1863                                         eo = pmatch[0].rm_eo;
1864                                         if (b[eo] != '\0')
1865                                                 break;
1866                                 }
1867                         } else if (c != '\0') {
1868                                 s = strchr(b+pp, c);
1869                                 if (!s)
1870                                         s = memchr(b+pp, '\0', p - pp);
1871                                 if (s) {
1872                                         so = eo = s-b;
1873                                         eo++;
1874                                         break;
1875                                 }
1876                         } else {
1877                                 while (b[rp] == '\n')
1878                                         rp++;
1879                                 s = strstr(b+rp, "\n\n");
1880                                 if (s) {
1881                                         so = eo = s-b;
1882                                         while (b[eo] == '\n')
1883                                                 eo++;
1884                                         if (b[eo] != '\0')
1885                                                 break;
1886                                 }
1887                         }
1888                 }
1889
1890                 if (a > 0) {
1891                         memmove(m, m+a, p+1);
1892                         b = m;
1893                         a = 0;
1894                 }
1895
1896                 m = qrealloc(m, a+p+128, &size);
1897                 b = m + a;
1898                 pp = p;
1899                 p += safe_read(fd, b+p, size-p-1);
1900                 if (p < pp) {
1901                         p = 0;
1902                         r = 0;
1903                         setvar_i(intvar[ERRNO], errno);
1904                 }
1905                 b[p] = '\0';
1906
1907         } while (p > pp);
1908
1909         if (p == 0) {
1910                 r--;
1911         } else {
1912                 c = b[so]; b[so] = '\0';
1913                 setvar_s(v, b+rp);
1914                 v->type |= VF_USER;
1915                 b[so] = c;
1916                 c = b[eo]; b[eo] = '\0';
1917                 setvar_s(intvar[RT], b+so);
1918                 b[eo] = c;
1919         }
1920
1921         rsm->buffer = m;
1922         rsm->adv = a + eo;
1923         rsm->pos = p - eo;
1924         rsm->size = size;
1925
1926         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1927
1928         return r;
1929 }
1930
1931 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1932 {
1933         int r = 0;
1934         char c;
1935         const char *s = format;
1936
1937         if (int_as_int && n == (int)n) {
1938                 r = snprintf(b, size, "%d", (int)n);
1939         } else {
1940                 do { c = *s; } while (c && *++s);
1941                 if (strchr("diouxX", c)) {
1942                         r = snprintf(b, size, format, (int)n);
1943                 } else if (strchr("eEfgG", c)) {
1944                         r = snprintf(b, size, format, n);
1945                 } else {
1946                         syntax_error(EMSG_INV_FMT);
1947                 }
1948         }
1949         return r;
1950 }
1951
1952 /* formatted output into an allocated buffer, return ptr to buffer */
1953 static char *awk_printf(node *n)
1954 {
1955         char *b = NULL;
1956         char *fmt, *s, *f;
1957         const char *s1;
1958         int i, j, incr, bsize;
1959         char c, c1;
1960         var *v, *arg;
1961
1962         v = nvalloc(1);
1963         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1964
1965         i = 0;
1966         while (*f) {
1967                 s = f;
1968                 while (*f && (*f != '%' || *++f == '%'))
1969                         f++;
1970                 while (*f && !isalpha(*f)) {
1971                         if (*f == '*')
1972                                 syntax_error("%*x formats are not supported");
1973                         f++;
1974                 }
1975
1976                 incr = (f - s) + MAXVARFMT;
1977                 b = qrealloc(b, incr + i, &bsize);
1978                 c = *f;
1979                 if (c != '\0')
1980                         f++;
1981                 c1 = *f;
1982                 *f = '\0';
1983                 arg = evaluate(nextarg(&n), v);
1984
1985                 j = i;
1986                 if (c == 'c' || !c) {
1987                         i += sprintf(b+i, s, is_numeric(arg) ?
1988                                         (char)getvar_i(arg) : *getvar_s(arg));
1989                 } else if (c == 's') {
1990                         s1 = getvar_s(arg);
1991                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1992                         i += sprintf(b+i, s, s1);
1993                 } else {
1994                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1995                 }
1996                 *f = c1;
1997
1998                 /* if there was an error while sprintf, return value is negative */
1999                 if (i < j)
2000                         i = j;
2001         }
2002
2003         free(fmt);
2004         nvfree(v);
2005         b = xrealloc(b, i + 1);
2006         b[i] = '\0';
2007         return b;
2008 }
2009
2010 /* Common substitution routine.
2011  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2012  * store result into (dest), return number of substitutions.
2013  * If nm = 0, replace all matches.
2014  * If src or dst is NULL, use $0.
2015  * If subexp != 0, enable subexpression matching (\1-\9).
2016  */
2017 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2018 {
2019         char *resbuf;
2020         const char *sp;
2021         int match_no, residx, replen, resbufsize;
2022         int regexec_flags;
2023         regmatch_t pmatch[10];
2024         regex_t sreg, *regex;
2025
2026         resbuf = NULL;
2027         residx = 0;
2028         match_no = 0;
2029         regexec_flags = 0;
2030         regex = as_regex(rn, &sreg);
2031         sp = getvar_s(src ? src : intvar[F0]);
2032         replen = strlen(repl);
2033         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2034                 int so = pmatch[0].rm_so;
2035                 int eo = pmatch[0].rm_eo;
2036
2037                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2038                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2039                 memcpy(resbuf + residx, sp, eo);
2040                 residx += eo;
2041                 if (++match_no >= nm) {
2042                         const char *s;
2043                         int nbs;
2044
2045                         /* replace */
2046                         residx -= (eo - so);
2047                         nbs = 0;
2048                         for (s = repl; *s; s++) {
2049                                 char c = resbuf[residx++] = *s;
2050                                 if (c == '\\') {
2051                                         nbs++;
2052                                         continue;
2053                                 }
2054                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2055                                         int j;
2056                                         residx -= ((nbs + 3) >> 1);
2057                                         j = 0;
2058                                         if (c != '&') {
2059                                                 j = c - '0';
2060                                                 nbs++;
2061                                         }
2062                                         if (nbs % 2) {
2063                                                 resbuf[residx++] = c;
2064                                         } else {
2065                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2066                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2067                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2068                                                 residx += n;
2069                                         }
2070                                 }
2071                                 nbs = 0;
2072                         }
2073                 }
2074
2075                 regexec_flags = REG_NOTBOL;
2076                 sp += eo;
2077                 if (match_no == nm)
2078                         break;
2079                 if (eo == so) {
2080                         /* Empty match (e.g. "b*" will match anywhere).
2081                          * Advance by one char. */
2082 //BUG (bug 1333):
2083 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2084 //... and will erroneously match "b" even though it is NOT at the word start.
2085 //we need REG_NOTBOW but it does not exist...
2086 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2087 //it should be able to do it correctly.
2088                         /* Subtle: this is safe only because
2089                          * qrealloc allocated at least one extra byte */
2090                         resbuf[residx] = *sp;
2091                         if (*sp == '\0')
2092                                 goto ret;
2093                         sp++;
2094                         residx++;
2095                 }
2096         }
2097
2098         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2099         strcpy(resbuf + residx, sp);
2100  ret:
2101         //bb_error_msg("end sp:'%s'%p", sp,sp);
2102         setvar_p(dest ? dest : intvar[F0], resbuf);
2103         if (regex == &sreg)
2104                 regfree(regex);
2105         return match_no;
2106 }
2107
2108 static NOINLINE int do_mktime(const char *ds)
2109 {
2110         struct tm then;
2111         int count;
2112
2113         /*memset(&then, 0, sizeof(then)); - not needed */
2114         then.tm_isdst = -1; /* default is unknown */
2115
2116         /* manpage of mktime says these fields are ints,
2117          * so we can sscanf stuff directly into them */
2118         count = sscanf(ds, "%u %u %u %u %u %u %d",
2119                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2120                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2121                 &then.tm_isdst);
2122
2123         if (count < 6
2124          || (unsigned)then.tm_mon < 1
2125          || (unsigned)then.tm_year < 1900
2126         ) {
2127                 return -1;
2128         }
2129
2130         then.tm_mon -= 1;
2131         then.tm_year -= 1900;
2132
2133         return mktime(&then);
2134 }
2135
2136 static NOINLINE var *exec_builtin(node *op, var *res)
2137 {
2138 #define tspl (G.exec_builtin__tspl)
2139
2140         var *tv;
2141         node *an[4];
2142         var *av[4];
2143         const char *as[4];
2144         regmatch_t pmatch[2];
2145         regex_t sreg, *re;
2146         node *spl;
2147         uint32_t isr, info;
2148         int nargs;
2149         time_t tt;
2150         int i, l, ll, n;
2151
2152         tv = nvalloc(4);
2153         isr = info = op->info;
2154         op = op->l.n;
2155
2156         av[2] = av[3] = NULL;
2157         for (i = 0; i < 4 && op; i++) {
2158                 an[i] = nextarg(&op);
2159                 if (isr & 0x09000000)
2160                         av[i] = evaluate(an[i], &tv[i]);
2161                 if (isr & 0x08000000)
2162                         as[i] = getvar_s(av[i]);
2163                 isr >>= 1;
2164         }
2165
2166         nargs = i;
2167         if ((uint32_t)nargs < (info >> 30))
2168                 syntax_error(EMSG_TOO_FEW_ARGS);
2169
2170         info &= OPNMASK;
2171         switch (info) {
2172
2173         case B_a2:
2174                 if (ENABLE_FEATURE_AWK_LIBM)
2175                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2176                 else
2177                         syntax_error(EMSG_NO_MATH);
2178                 break;
2179
2180         case B_sp: {
2181                 char *s, *s1;
2182
2183                 if (nargs > 2) {
2184                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2185                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2186                 } else {
2187                         spl = &fsplitter.n;
2188                 }
2189
2190                 n = awk_split(as[0], spl, &s);
2191                 s1 = s;
2192                 clear_array(iamarray(av[1]));
2193                 for (i = 1; i <= n; i++)
2194                         setari_u(av[1], i, nextword(&s));
2195                 free(s1);
2196                 setvar_i(res, n);
2197                 break;
2198         }
2199
2200         case B_ss: {
2201                 char *s;
2202
2203                 l = strlen(as[0]);
2204                 i = getvar_i(av[1]) - 1;
2205                 if (i > l)
2206                         i = l;
2207                 if (i < 0)
2208                         i = 0;
2209                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2210                 if (n < 0)
2211                         n = 0;
2212                 s = xstrndup(as[0]+i, n);
2213                 setvar_p(res, s);
2214                 break;
2215         }
2216
2217         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2218          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2219         case B_an:
2220                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2221                 break;
2222
2223         case B_co:
2224                 setvar_i(res, ~getvar_i_int(av[0]));
2225                 break;
2226
2227         case B_ls:
2228                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2229                 break;
2230
2231         case B_or:
2232                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2233                 break;
2234
2235         case B_rs:
2236                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2237                 break;
2238
2239         case B_xo:
2240                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2241                 break;
2242
2243         case B_lo:
2244         case B_up: {
2245                 char *s, *s1;
2246                 s1 = s = xstrdup(as[0]);
2247                 while (*s1) {
2248                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2249                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2250                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2251                         s1++;
2252                 }
2253                 setvar_p(res, s);
2254                 break;
2255         }
2256
2257         case B_ix:
2258                 n = 0;
2259                 ll = strlen(as[1]);
2260                 l = strlen(as[0]) - ll;
2261                 if (ll > 0 && l >= 0) {
2262                         if (!icase) {
2263                                 char *s = strstr(as[0], as[1]);
2264                                 if (s)
2265                                         n = (s - as[0]) + 1;
2266                         } else {
2267                                 /* this piece of code is terribly slow and
2268                                  * really should be rewritten
2269                                  */
2270                                 for (i = 0; i <= l; i++) {
2271                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2272                                                 n = i+1;
2273                                                 break;
2274                                         }
2275                                 }
2276                         }
2277                 }
2278                 setvar_i(res, n);
2279                 break;
2280
2281         case B_ti:
2282                 if (nargs > 1)
2283                         tt = getvar_i(av[1]);
2284                 else
2285                         time(&tt);
2286                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2287                 i = strftime(g_buf, MAXVARFMT,
2288                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2289                         localtime(&tt));
2290                 g_buf[i] = '\0';
2291                 setvar_s(res, g_buf);
2292                 break;
2293
2294         case B_mt:
2295                 setvar_i(res, do_mktime(as[0]));
2296                 break;
2297
2298         case B_ma:
2299                 re = as_regex(an[1], &sreg);
2300                 n = regexec(re, as[0], 1, pmatch, 0);
2301                 if (n == 0) {
2302                         pmatch[0].rm_so++;
2303                         pmatch[0].rm_eo++;
2304                 } else {
2305                         pmatch[0].rm_so = 0;
2306                         pmatch[0].rm_eo = -1;
2307                 }
2308                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2309                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2310                 setvar_i(res, pmatch[0].rm_so);
2311                 if (re == &sreg)
2312                         regfree(re);
2313                 break;
2314
2315         case B_ge:
2316                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2317                 break;
2318
2319         case B_gs:
2320                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2321                 break;
2322
2323         case B_su:
2324                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2325                 break;
2326         }
2327
2328         nvfree(tv);
2329         return res;
2330 #undef tspl
2331 }
2332
2333 /*
2334  * Evaluate node - the heart of the program. Supplied with subtree
2335  * and place where to store result. returns ptr to result.
2336  */
2337 #define XC(n) ((n) >> 8)
2338
2339 static var *evaluate(node *op, var *res)
2340 {
2341 /* This procedure is recursive so we should count every byte */
2342 #define fnargs (G.evaluate__fnargs)
2343 /* seed is initialized to 1 */
2344 #define seed   (G.evaluate__seed)
2345 #define sreg   (G.evaluate__sreg)
2346
2347         var *v1;
2348
2349         if (!op)
2350                 return setvar_s(res, NULL);
2351
2352         debug_printf_eval("entered %s()\n", __func__);
2353
2354         v1 = nvalloc(2);
2355
2356         while (op) {
2357                 struct {
2358                         var *v;
2359                         const char *s;
2360                 } L = L; /* for compiler */
2361                 struct {
2362                         var *v;
2363                         const char *s;
2364                 } R = R;
2365                 double L_d = L_d;
2366                 uint32_t opinfo;
2367                 int opn;
2368                 node *op1;
2369
2370                 opinfo = op->info;
2371                 opn = (opinfo & OPNMASK);
2372                 g_lineno = op->lineno;
2373                 op1 = op->l.n;
2374                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2375
2376                 /* execute inevitable things */
2377                 if (opinfo & OF_RES1)
2378                         L.v = evaluate(op1, v1);
2379                 if (opinfo & OF_RES2)
2380                         R.v = evaluate(op->r.n, v1+1);
2381                 if (opinfo & OF_STR1) {
2382                         L.s = getvar_s(L.v);
2383                         debug_printf_eval("L.s:'%s'\n", L.s);
2384                 }
2385                 if (opinfo & OF_STR2) {
2386                         R.s = getvar_s(R.v);
2387                         debug_printf_eval("R.s:'%s'\n", R.s);
2388                 }
2389                 if (opinfo & OF_NUM1) {
2390                         L_d = getvar_i(L.v);
2391                         debug_printf_eval("L_d:%f\n", L_d);
2392                 }
2393
2394                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2395                 switch (XC(opinfo & OPCLSMASK)) {
2396
2397                 /* -- iterative node type -- */
2398
2399                 /* test pattern */
2400                 case XC( OC_TEST ):
2401                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2402                                 /* it's range pattern */
2403                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2404                                         op->info |= OF_CHECKED;
2405                                         if (ptest(op1->r.n))
2406                                                 op->info &= ~OF_CHECKED;
2407                                         op = op->a.n;
2408                                 } else {
2409                                         op = op->r.n;
2410                                 }
2411                         } else {
2412                                 op = ptest(op1) ? op->a.n : op->r.n;
2413                         }
2414                         break;
2415
2416                 /* just evaluate an expression, also used as unconditional jump */
2417                 case XC( OC_EXEC ):
2418                         break;
2419
2420                 /* branch, used in if-else and various loops */
2421                 case XC( OC_BR ):
2422                         op = istrue(L.v) ? op->a.n : op->r.n;
2423                         break;
2424
2425                 /* initialize for-in loop */
2426                 case XC( OC_WALKINIT ):
2427                         hashwalk_init(L.v, iamarray(R.v));
2428                         break;
2429
2430                 /* get next array item */
2431                 case XC( OC_WALKNEXT ):
2432                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2433                         break;
2434
2435                 case XC( OC_PRINT ):
2436                 case XC( OC_PRINTF ): {
2437                         FILE *F = stdout;
2438
2439                         if (op->r.n) {
2440                                 rstream *rsm = newfile(R.s);
2441                                 if (!rsm->F) {
2442                                         if (opn == '|') {
2443                                                 rsm->F = popen(R.s, "w");
2444                                                 if (rsm->F == NULL)
2445                                                         bb_perror_msg_and_die("popen");
2446                                                 rsm->is_pipe = 1;
2447                                         } else {
2448                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2449                                         }
2450                                 }
2451                                 F = rsm->F;
2452                         }
2453
2454                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2455                                 if (!op1) {
2456                                         fputs(getvar_s(intvar[F0]), F);
2457                                 } else {
2458                                         while (op1) {
2459                                                 var *v = evaluate(nextarg(&op1), v1);
2460                                                 if (v->type & VF_NUMBER) {
2461                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2462                                                                         getvar_i(v), TRUE);
2463                                                         fputs(g_buf, F);
2464                                                 } else {
2465                                                         fputs(getvar_s(v), F);
2466                                                 }
2467
2468                                                 if (op1)
2469                                                         fputs(getvar_s(intvar[OFS]), F);
2470                                         }
2471                                 }
2472                                 fputs(getvar_s(intvar[ORS]), F);
2473
2474                         } else {        /* OC_PRINTF */
2475                                 char *s = awk_printf(op1);
2476                                 fputs(s, F);
2477                                 free(s);
2478                         }
2479                         fflush(F);
2480                         break;
2481                 }
2482
2483                 case XC( OC_DELETE ): {
2484                         uint32_t info = op1->info & OPCLSMASK;
2485                         var *v;
2486
2487                         if (info == OC_VAR) {
2488                                 v = op1->l.v;
2489                         } else if (info == OC_FNARG) {
2490                                 v = &fnargs[op1->l.aidx];
2491                         } else {
2492                                 syntax_error(EMSG_NOT_ARRAY);
2493                         }
2494
2495                         if (op1->r.n) {
2496                                 const char *s;
2497                                 clrvar(L.v);
2498                                 s = getvar_s(evaluate(op1->r.n, v1));
2499                                 hash_remove(iamarray(v), s);
2500                         } else {
2501                                 clear_array(iamarray(v));
2502                         }
2503                         break;
2504                 }
2505
2506                 case XC( OC_NEWSOURCE ):
2507                         g_progname = op->l.new_progname;
2508                         break;
2509
2510                 case XC( OC_RETURN ):
2511                         copyvar(res, L.v);
2512                         break;
2513
2514                 case XC( OC_NEXTFILE ):
2515                         nextfile = TRUE;
2516                 case XC( OC_NEXT ):
2517                         nextrec = TRUE;
2518                 case XC( OC_DONE ):
2519                         clrvar(res);
2520                         break;
2521
2522                 case XC( OC_EXIT ):
2523                         awk_exit(L_d);
2524
2525                 /* -- recursive node type -- */
2526
2527                 case XC( OC_VAR ):
2528                         L.v = op->l.v;
2529                         if (L.v == intvar[NF])
2530                                 split_f0();
2531                         goto v_cont;
2532
2533                 case XC( OC_FNARG ):
2534                         L.v = &fnargs[op->l.aidx];
2535  v_cont:
2536                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2537                         break;
2538
2539                 case XC( OC_IN ):
2540                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2541                         break;
2542
2543                 case XC( OC_REGEXP ):
2544                         op1 = op;
2545                         L.s = getvar_s(intvar[F0]);
2546                         goto re_cont;
2547
2548                 case XC( OC_MATCH ):
2549                         op1 = op->r.n;
2550  re_cont:
2551                         {
2552                                 regex_t *re = as_regex(op1, &sreg);
2553                                 int i = regexec(re, L.s, 0, NULL, 0);
2554                                 if (re == &sreg)
2555                                         regfree(re);
2556                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2557                         }
2558                         break;
2559
2560                 case XC( OC_MOVE ):
2561                         debug_printf_eval("MOVE\n");
2562                         /* if source is a temporary string, jusk relink it to dest */
2563 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2564 //then L.v ends up being a string, which is wrong
2565 //                      if (R.v == v1+1 && R.v->string) {
2566 //                              res = setvar_p(L.v, R.v->string);
2567 //                              R.v->string = NULL;
2568 //                      } else {
2569                                 res = copyvar(L.v, R.v);
2570 //                      }
2571                         break;
2572
2573                 case XC( OC_TERNARY ):
2574                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2575                                 syntax_error(EMSG_POSSIBLE_ERROR);
2576                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2577                         break;
2578
2579                 case XC( OC_FUNC ): {
2580                         var *vbeg, *v;
2581                         const char *sv_progname;
2582
2583                         if (!op->r.f->body.first)
2584                                 syntax_error(EMSG_UNDEF_FUNC);
2585
2586                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2587                         while (op1) {
2588                                 var *arg = evaluate(nextarg(&op1), v1);
2589                                 copyvar(v, arg);
2590                                 v->type |= VF_CHILD;
2591                                 v->x.parent = arg;
2592                                 if (++v - vbeg >= op->r.f->nargs)
2593                                         break;
2594                         }
2595
2596                         v = fnargs;
2597                         fnargs = vbeg;
2598                         sv_progname = g_progname;
2599
2600                         res = evaluate(op->r.f->body.first, res);
2601
2602                         g_progname = sv_progname;
2603                         nvfree(fnargs);
2604                         fnargs = v;
2605
2606                         break;
2607                 }
2608
2609                 case XC( OC_GETLINE ):
2610                 case XC( OC_PGETLINE ): {
2611                         rstream *rsm;
2612                         int i;
2613
2614                         if (op1) {
2615                                 rsm = newfile(L.s);
2616                                 if (!rsm->F) {
2617                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2618                                                 rsm->F = popen(L.s, "r");
2619                                                 rsm->is_pipe = TRUE;
2620                                         } else {
2621                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2622                                         }
2623                                 }
2624                         } else {
2625                                 if (!iF)
2626                                         iF = next_input_file();
2627                                 rsm = iF;
2628                         }
2629
2630                         if (!rsm->F) {
2631                                 setvar_i(intvar[ERRNO], errno);
2632                                 setvar_i(res, -1);
2633                                 break;
2634                         }
2635
2636                         if (!op->r.n)
2637                                 R.v = intvar[F0];
2638
2639                         i = awk_getline(rsm, R.v);
2640                         if (i > 0 && !op1) {
2641                                 incvar(intvar[FNR]);
2642                                 incvar(intvar[NR]);
2643                         }
2644                         setvar_i(res, i);
2645                         break;
2646                 }
2647
2648                 /* simple builtins */
2649                 case XC( OC_FBLTIN ): {
2650                         double R_d = R_d; /* for compiler */
2651
2652                         switch (opn) {
2653                         case F_in:
2654                                 R_d = (int)L_d;
2655                                 break;
2656
2657                         case F_rn:
2658                                 R_d = (double)rand() / (double)RAND_MAX;
2659                                 break;
2660
2661                         case F_co:
2662                                 if (ENABLE_FEATURE_AWK_LIBM) {
2663                                         R_d = cos(L_d);
2664                                         break;
2665                                 }
2666
2667                         case F_ex:
2668                                 if (ENABLE_FEATURE_AWK_LIBM) {
2669                                         R_d = exp(L_d);
2670                                         break;
2671                                 }
2672
2673                         case F_lg:
2674                                 if (ENABLE_FEATURE_AWK_LIBM) {
2675                                         R_d = log(L_d);
2676                                         break;
2677                                 }
2678
2679                         case F_si:
2680                                 if (ENABLE_FEATURE_AWK_LIBM) {
2681                                         R_d = sin(L_d);
2682                                         break;
2683                                 }
2684
2685                         case F_sq:
2686                                 if (ENABLE_FEATURE_AWK_LIBM) {
2687                                         R_d = sqrt(L_d);
2688                                         break;
2689                                 }
2690
2691                                 syntax_error(EMSG_NO_MATH);
2692                                 break;
2693
2694                         case F_sr:
2695                                 R_d = (double)seed;
2696                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2697                                 srand(seed);
2698                                 break;
2699
2700                         case F_ti:
2701                                 R_d = time(NULL);
2702                                 break;
2703
2704                         case F_le:
2705                                 if (!op1)
2706                                         L.s = getvar_s(intvar[F0]);
2707                                 R_d = strlen(L.s);
2708                                 break;
2709
2710                         case F_sy:
2711                                 fflush_all();
2712                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2713                                                 ? (system(L.s) >> 8) : 0;
2714                                 break;
2715
2716                         case F_ff:
2717                                 if (!op1) {
2718                                         fflush(stdout);
2719                                 } else if (L.s && *L.s) {
2720                                         rstream *rsm = newfile(L.s);
2721                                         fflush(rsm->F);
2722                                 } else {
2723                                         fflush_all();
2724                                 }
2725                                 break;
2726
2727                         case F_cl: {
2728                                 rstream *rsm;
2729                                 int err = 0;
2730                                 rsm = (rstream *)hash_search(fdhash, L.s);
2731                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2732                                 if (rsm) {
2733                                         debug_printf_eval("OC_FBLTIN F_cl "
2734                                                 "rsm->is_pipe:%d, ->F:%p\n",
2735                                                 rsm->is_pipe, rsm->F);
2736                                         /* Can be NULL if open failed. Example:
2737                                          * getline line <"doesnt_exist";
2738                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2739                                          */
2740                                         if (rsm->F)
2741                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2742                                         free(rsm->buffer);
2743                                         hash_remove(fdhash, L.s);
2744                                 }
2745                                 if (err)
2746                                         setvar_i(intvar[ERRNO], errno);
2747                                 R_d = (double)err;
2748                                 break;
2749                         }
2750                         } /* switch */
2751                         setvar_i(res, R_d);
2752                         break;
2753                 }
2754
2755                 case XC( OC_BUILTIN ):
2756                         res = exec_builtin(op, res);
2757                         break;
2758
2759                 case XC( OC_SPRINTF ):
2760                         setvar_p(res, awk_printf(op1));
2761                         break;
2762
2763                 case XC( OC_UNARY ): {
2764                         double Ld, R_d;
2765
2766                         Ld = R_d = getvar_i(R.v);
2767                         switch (opn) {
2768                         case 'P':
2769                                 Ld = ++R_d;
2770                                 goto r_op_change;
2771                         case 'p':
2772                                 R_d++;
2773                                 goto r_op_change;
2774                         case 'M':
2775                                 Ld = --R_d;
2776                                 goto r_op_change;
2777                         case 'm':
2778                                 R_d--;
2779  r_op_change:
2780                                 setvar_i(R.v, R_d);
2781                                 break;
2782                         case '!':
2783                                 Ld = !istrue(R.v);
2784                                 break;
2785                         case '-':
2786                                 Ld = -R_d;
2787                                 break;
2788                         }
2789                         setvar_i(res, Ld);
2790                         break;
2791                 }
2792
2793                 case XC( OC_FIELD ): {
2794                         int i = (int)getvar_i(R.v);
2795                         if (i == 0) {
2796                                 res = intvar[F0];
2797                         } else {
2798                                 split_f0();
2799                                 if (i > nfields)
2800                                         fsrealloc(i);
2801                                 res = &Fields[i - 1];
2802                         }
2803                         break;
2804                 }
2805
2806                 /* concatenation (" ") and index joining (",") */
2807                 case XC( OC_CONCAT ):
2808                 case XC( OC_COMMA ): {
2809                         const char *sep = "";
2810                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2811                                 sep = getvar_s(intvar[SUBSEP]);
2812                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2813                         break;
2814                 }
2815
2816                 case XC( OC_LAND ):
2817                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2818                         break;
2819
2820                 case XC( OC_LOR ):
2821                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2822                         break;
2823
2824                 case XC( OC_BINARY ):
2825                 case XC( OC_REPLACE ): {
2826                         double R_d = getvar_i(R.v);
2827                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2828                         switch (opn) {
2829                         case '+':
2830                                 L_d += R_d;
2831                                 break;
2832                         case '-':
2833                                 L_d -= R_d;
2834                                 break;
2835                         case '*':
2836                                 L_d *= R_d;
2837                                 break;
2838                         case '/':
2839                                 if (R_d == 0)
2840                                         syntax_error(EMSG_DIV_BY_ZERO);
2841                                 L_d /= R_d;
2842                                 break;
2843                         case '&':
2844                                 if (ENABLE_FEATURE_AWK_LIBM)
2845                                         L_d = pow(L_d, R_d);
2846                                 else
2847                                         syntax_error(EMSG_NO_MATH);
2848                                 break;
2849                         case '%':
2850                                 if (R_d == 0)
2851                                         syntax_error(EMSG_DIV_BY_ZERO);
2852                                 L_d -= (int)(L_d / R_d) * R_d;
2853                                 break;
2854                         }
2855                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2856                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2857                         break;
2858                 }
2859
2860                 case XC( OC_COMPARE ): {
2861                         int i = i; /* for compiler */
2862                         double Ld;
2863
2864                         if (is_numeric(L.v) && is_numeric(R.v)) {
2865                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2866                         } else {
2867                                 const char *l = getvar_s(L.v);
2868                                 const char *r = getvar_s(R.v);
2869                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2870                         }
2871                         switch (opn & 0xfe) {
2872                         case 0:
2873                                 i = (Ld > 0);
2874                                 break;
2875                         case 2:
2876                                 i = (Ld >= 0);
2877                                 break;
2878                         case 4:
2879                                 i = (Ld == 0);
2880                                 break;
2881                         }
2882                         setvar_i(res, (i == 0) ^ (opn & 1));
2883                         break;
2884                 }
2885
2886                 default:
2887                         syntax_error(EMSG_POSSIBLE_ERROR);
2888                 }
2889                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2890                         op = op->a.n;
2891                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2892                         break;
2893                 if (nextrec)
2894                         break;
2895         } /* while (op) */
2896
2897         nvfree(v1);
2898         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2899         return res;
2900 #undef fnargs
2901 #undef seed
2902 #undef sreg
2903 }
2904
2905
2906 /* -------- main & co. -------- */
2907
2908 static int awk_exit(int r)
2909 {
2910         var tv;
2911         unsigned i;
2912         hash_item *hi;
2913
2914         zero_out_var(&tv);
2915
2916         if (!exiting) {
2917                 exiting = TRUE;
2918                 nextrec = FALSE;
2919                 evaluate(endseq.first, &tv);
2920         }
2921
2922         /* waiting for children */
2923         for (i = 0; i < fdhash->csize; i++) {
2924                 hi = fdhash->items[i];
2925                 while (hi) {
2926                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2927                                 pclose(hi->data.rs.F);
2928                         hi = hi->next;
2929                 }
2930         }
2931
2932         exit(r);
2933 }
2934
2935 /* if expr looks like "var=value", perform assignment and return 1,
2936  * otherwise return 0 */
2937 static int is_assignment(const char *expr)
2938 {
2939         char *exprc, *val, *s, *s1;
2940
2941         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2942                 return FALSE;
2943         }
2944
2945         exprc = xstrdup(expr);
2946         val = exprc + (val - expr);
2947         *val++ = '\0';
2948
2949         s = s1 = val;
2950         while ((*s1 = nextchar(&s)) != '\0')
2951                 s1++;
2952
2953         setvar_u(newvar(exprc), val);
2954         free(exprc);
2955         return TRUE;
2956 }
2957
2958 /* switch to next input file */
2959 static rstream *next_input_file(void)
2960 {
2961 #define rsm          (G.next_input_file__rsm)
2962 #define files_happen (G.next_input_file__files_happen)
2963
2964         FILE *F = NULL;
2965         const char *fname, *ind;
2966
2967         if (rsm.F)
2968                 fclose(rsm.F);
2969         rsm.F = NULL;
2970         rsm.pos = rsm.adv = 0;
2971
2972         do {
2973                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2974                         if (files_happen)
2975                                 return NULL;
2976                         fname = "-";
2977                         F = stdin;
2978                 } else {
2979                         ind = getvar_s(incvar(intvar[ARGIND]));
2980                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2981                         if (fname && *fname && !is_assignment(fname))
2982                                 F = xfopen_stdin(fname);
2983                 }
2984         } while (!F);
2985
2986         files_happen = TRUE;
2987         setvar_s(intvar[FILENAME], fname);
2988         rsm.F = F;
2989         return &rsm;
2990 #undef rsm
2991 #undef files_happen
2992 }
2993
2994 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2995 int awk_main(int argc, char **argv)
2996 {
2997         unsigned opt;
2998         char *opt_F, *opt_W;
2999         llist_t *list_v = NULL;
3000         llist_t *list_f = NULL;
3001         int i, j;
3002         var *v;
3003         var tv;
3004         char **envp;
3005         char *vnames = (char *)vNames; /* cheat */
3006         char *vvalues = (char *)vValues;
3007
3008         INIT_G();
3009
3010         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3011          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3012         if (ENABLE_LOCALE_SUPPORT)
3013                 setlocale(LC_NUMERIC, "C");
3014
3015         zero_out_var(&tv);
3016
3017         /* allocate global buffer */
3018         g_buf = xmalloc(MAXVARFMT + 1);
3019
3020         vhash = hash_init();
3021         ahash = hash_init();
3022         fdhash = hash_init();
3023         fnhash = hash_init();
3024
3025         /* initialize variables */
3026         for (i = 0; *vnames; i++) {
3027                 intvar[i] = v = newvar(nextword(&vnames));
3028                 if (*vvalues != '\377')
3029                         setvar_s(v, nextword(&vvalues));
3030                 else
3031                         setvar_i(v, 0);
3032
3033                 if (*vnames == '*') {
3034                         v->type |= VF_SPECIAL;
3035                         vnames++;
3036                 }
3037         }
3038
3039         handle_special(intvar[FS]);
3040         handle_special(intvar[RS]);
3041
3042         newfile("/dev/stdin")->F = stdin;
3043         newfile("/dev/stdout")->F = stdout;
3044         newfile("/dev/stderr")->F = stderr;
3045
3046         /* Huh, people report that sometimes environ is NULL. Oh well. */
3047         if (environ) for (envp = environ; *envp; envp++) {
3048                 /* environ is writable, thus we don't strdup it needlessly */
3049                 char *s = *envp;
3050                 char *s1 = strchr(s, '=');
3051                 if (s1) {
3052                         *s1 = '\0';
3053                         /* Both findvar and setvar_u take const char*
3054                          * as 2nd arg -> environment is not trashed */
3055                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3056                         *s1 = '=';
3057                 }
3058         }
3059         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3060         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3061         argv += optind;
3062         argc -= optind;
3063         if (opt & 0x1)
3064                 setvar_s(intvar[FS], opt_F); // -F
3065         while (list_v) { /* -v */
3066                 if (!is_assignment(llist_pop(&list_v)))
3067                         bb_show_usage();
3068         }
3069         if (list_f) { /* -f */
3070                 do {
3071                         char *s = NULL;
3072                         FILE *from_file;
3073
3074                         g_progname = llist_pop(&list_f);
3075                         from_file = xfopen_stdin(g_progname);
3076                         /* one byte is reserved for some trick in next_token */
3077                         for (i = j = 1; j > 0; i += j) {
3078                                 s = xrealloc(s, i + 4096);
3079                                 j = fread(s + i, 1, 4094, from_file);
3080                         }
3081                         s[i] = '\0';
3082                         fclose(from_file);
3083                         parse_program(s + 1);
3084                         free(s);
3085                 } while (list_f);
3086                 argc++;
3087         } else { // no -f: take program from 1st parameter
3088                 if (!argc)
3089                         bb_show_usage();
3090                 g_progname = "cmd. line";
3091                 parse_program(*argv++);
3092         }
3093         if (opt & 0x8) // -W
3094                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3095
3096         /* fill in ARGV array */
3097         setvar_i(intvar[ARGC], argc);
3098         setari_u(intvar[ARGV], 0, "awk");
3099         i = 0;
3100         while (*argv)
3101                 setari_u(intvar[ARGV], ++i, *argv++);
3102
3103         evaluate(beginseq.first, &tv);
3104         if (!mainseq.first && !endseq.first)
3105                 awk_exit(EXIT_SUCCESS);
3106
3107         /* input file could already be opened in BEGIN block */
3108         if (!iF)
3109                 iF = next_input_file();
3110
3111         /* passing through input files */
3112         while (iF) {
3113                 nextfile = FALSE;
3114                 setvar_i(intvar[FNR], 0);
3115
3116                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3117                         nextrec = FALSE;
3118                         incvar(intvar[NR]);
3119                         incvar(intvar[FNR]);
3120                         evaluate(mainseq.first, &tv);
3121
3122                         if (nextfile)
3123                                 break;
3124                 }
3125
3126                 if (i < 0)
3127                         syntax_error(strerror(errno));
3128
3129                 iF = next_input_file();
3130         }
3131
3132         awk_exit(EXIT_SUCCESS);
3133         /*return 0;*/
3134 }