awk: code shrink
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20 #define debug_printf_eval(...)  do {} while (0)
21
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
24 #endif
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
27 #endif
28
29
30
31 #define MAXVARFMT       240
32 #define MINNVBLOCK      64
33
34 /* variable flags */
35 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
36 #define VF_ARRAY        0x0002  /* 1 = it's an array */
37
38 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
39 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
41 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
45
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
48
49 typedef struct walker_list {
50         char *end;
51         char *cur;
52         struct walker_list *prev;
53         char wbuf[1];
54 } walker_list;
55
56 /* Variable */
57 typedef struct var_s {
58         unsigned type;            /* flags */
59         double number;
60         char *string;
61         union {
62                 int aidx;               /* func arg idx (for compilation stage) */
63                 struct xhash_s *array;  /* array ptr */
64                 struct var_s *parent;   /* for func args, ptr to actual parameter */
65                 walker_list *walker;    /* list of array elements (for..in) */
66         } x;
67 } var;
68
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
71         struct node_s *first;
72         struct node_s *last;
73         const char *programname;
74 } chain;
75
76 /* Function */
77 typedef struct func_s {
78         unsigned nargs;
79         struct chain_s body;
80 } func;
81
82 /* I/O stream */
83 typedef struct rstream_s {
84         FILE *F;
85         char *buffer;
86         int adv;
87         int size;
88         int pos;
89         smallint is_pipe;
90 } rstream;
91
92 typedef struct hash_item_s {
93         union {
94                 struct var_s v;         /* variable/array hash */
95                 struct rstream_s rs;    /* redirect streams hash */
96                 struct func_s f;        /* functions hash */
97         } data;
98         struct hash_item_s *next;       /* next in chain */
99         char name[1];                   /* really it's longer */
100 } hash_item;
101
102 typedef struct xhash_s {
103         unsigned nel;           /* num of elements */
104         unsigned csize;         /* current hash size */
105         unsigned nprime;        /* next hash size in PRIMES[] */
106         unsigned glen;          /* summary length of item names */
107         struct hash_item_s **items;
108 } xhash;
109
110 /* Tree node */
111 typedef struct node_s {
112         uint32_t info;
113         unsigned lineno;
114         union {
115                 struct node_s *n;
116                 var *v;
117                 int aidx;
118                 char *new_progname;
119                 regex_t *re;
120         } l;
121         union {
122                 struct node_s *n;
123                 regex_t *ire;
124                 func *f;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195                    | TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1    0x010000
209 #define OF_RES2    0x020000
210 #define OF_STR1    0x040000
211 #define OF_STR2    0x080000
212 #define OF_NUM1    0x100000
213 #define OF_CHECKED 0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK   0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)      (x << 24)
235 #define PRIMASK   0x7F000000
236 #define PRIMASK2  0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
245         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
246
247         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
248         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
249         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
250
251         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
252         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
253         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
254         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
255         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
256         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
257         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
258         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
259         OC_DONE = 0x2800,
260
261         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
262         ST_WHILE = 0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su,
275         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
276 };
277
278 /* tokens and their corresponding info values */
279
280 #define NTC     "\377"  /* switch to next token class (tc<<1) */
281 #define NTCC    '\377'
282
283 #define OC_B    OC_BUILTIN
284
285 static const char tokenlist[] ALIGN1 =
286         "\1("       NTC
287         "\1)"       NTC
288         "\1/"       NTC                                 /* REGEXP */
289         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
290         "\2++"      "\2--"      NTC                     /* UOPPOST */
291         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
292         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
293         "\2*="      "\2/="      "\2%="      "\2^="
294         "\1+"       "\1-"       "\3**="     "\2**"
295         "\1/"       "\1%"       "\1^"       "\1*"
296         "\2!="      "\2>="      "\2<="      "\1>"
297         "\1<"       "\2!~"      "\1~"       "\2&&"
298         "\2||"      "\1?"       "\1:"       NTC
299         "\2in"      NTC
300         "\1,"       NTC
301         "\1|"       NTC
302         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
303         "\1]"       NTC
304         "\1{"       NTC
305         "\1}"       NTC
306         "\1;"       NTC
307         "\1\n"      NTC
308         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
309         "\10continue"           "\6delete"  "\5print"
310         "\6printf"  "\4next"    "\10nextfile"
311         "\6return"  "\4exit"    NTC
312         "\5while"   NTC
313         "\4else"    NTC
314
315         "\3and"     "\5compl"   "\6lshift"  "\2or"
316         "\6rshift"  "\3xor"
317         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
318         "\3cos"     "\3exp"     "\3int"     "\3log"
319         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
320         "\6gensub"  "\4gsub"    "\5index"   "\6length"
321         "\5match"   "\5split"   "\7sprintf" "\3sub"
322         "\6substr"  "\7systime" "\10strftime" "\6mktime"
323         "\7tolower" "\7toupper" NTC
324         "\7getline" NTC
325         "\4func"    "\10function"   NTC
326         "\5BEGIN"   NTC
327         "\3END"     "\0"
328         ;
329
330 static const uint32_t tokeninfo[] = {
331         0,
332         0,
333         OC_REGEXP,
334         xS|'a',     xS|'w',     xS|'|',
335         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
336         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
337             OC_FIELD|xV|P(5),
338         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
339             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
340         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
341             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
342         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
343             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
344         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
345             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
346         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
347             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
348         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
349             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
350         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
351             OC_COLON|xx|P(67)|':',
352         OC_IN|SV|P(49),
353         OC_COMMA|SS|P(80),
354         OC_PGETLINE|SV|P(37),
355         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
356             OC_UNARY|xV|P(19)|'!',
357         0,
358         0,
359         0,
360         0,
361         0,
362         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
363         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
364         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
365         OC_RETURN|Vx,   OC_EXIT|Nx,
366         ST_WHILE,
367         0,
368
369         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
376         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
378         OC_GETLINE|SV|P(0),
379         0,      0,
380         0,
381         0
382 };
383
384 /* internal variable names and their initial values       */
385 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
386 enum {
387         CONVFMT,    OFMT,       FS,         OFS,
388         ORS,        RS,         RT,         FILENAME,
389         SUBSEP,     F0,         ARGIND,     ARGC,
390         ARGV,       ERRNO,      FNR,        NR,
391         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
392 };
393
394 static const char vNames[] ALIGN1 =
395         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
396         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
397         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
398         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
399         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
400
401 static const char vValues[] ALIGN1 =
402         "%.6g\0"    "%.6g\0"    " \0"       " \0"
403         "\n\0"      "\n\0"      "\0"        "\0"
404         "\034\0"    "\0"        "\377";
405
406 /* hash size may grow to these values */
407 #define FIRST_PRIME 61
408 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
409
410
411 /* Globals. Split in two parts so that first one is addressed
412  * with (mostly short) negative offsets.
413  * NB: it's unsafe to put members of type "double"
414  * into globals2 (gcc may fail to align them).
415  */
416 struct globals {
417         double t_double;
418         chain beginseq, mainseq, endseq;
419         chain *seq;
420         node *break_ptr, *continue_ptr;
421         rstream *iF;
422         xhash *vhash, *ahash, *fdhash, *fnhash;
423         const char *g_progname;
424         int g_lineno;
425         int nfields;
426         int maxfields; /* used in fsrealloc() only */
427         var *Fields;
428         nvblock *g_cb;
429         char *g_pos;
430         char *g_buf;
431         smallint icase;
432         smallint exiting;
433         smallint nextrec;
434         smallint nextfile;
435         smallint is_f0_split;
436 };
437 struct globals2 {
438         uint32_t t_info; /* often used */
439         uint32_t t_tclass;
440         char *t_string;
441         int t_lineno;
442         int t_rollback;
443
444         var *intvar[NUM_INTERNAL_VARS]; /* often used */
445
446         /* former statics from various functions */
447         char *split_f0__fstrings;
448
449         uint32_t next_token__save_tclass;
450         uint32_t next_token__save_info;
451         uint32_t next_token__ltclass;
452         smallint next_token__concat_inserted;
453
454         smallint next_input_file__files_happen;
455         rstream next_input_file__rsm;
456
457         var *evaluate__fnargs;
458         unsigned evaluate__seed;
459         regex_t evaluate__sreg;
460
461         var ptest__v;
462
463         tsplitter exec_builtin__tspl;
464
465         /* biggest and least used members go last */
466         tsplitter fsplitter, rsplitter;
467 };
468 #define G1 (ptr_to_globals[-1])
469 #define G (*(struct globals2 *)ptr_to_globals)
470 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471 /*char G1size[sizeof(G1)]; - 0x74 */
472 /*char Gsize[sizeof(G)]; - 0x1c4 */
473 /* Trying to keep most of members accessible with short offsets: */
474 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475 #define t_double     (G1.t_double    )
476 #define beginseq     (G1.beginseq    )
477 #define mainseq      (G1.mainseq     )
478 #define endseq       (G1.endseq      )
479 #define seq          (G1.seq         )
480 #define break_ptr    (G1.break_ptr   )
481 #define continue_ptr (G1.continue_ptr)
482 #define iF           (G1.iF          )
483 #define vhash        (G1.vhash       )
484 #define ahash        (G1.ahash       )
485 #define fdhash       (G1.fdhash      )
486 #define fnhash       (G1.fnhash      )
487 #define g_progname   (G1.g_progname  )
488 #define g_lineno     (G1.g_lineno    )
489 #define nfields      (G1.nfields     )
490 #define maxfields    (G1.maxfields   )
491 #define Fields       (G1.Fields      )
492 #define g_cb         (G1.g_cb        )
493 #define g_pos        (G1.g_pos       )
494 #define g_buf        (G1.g_buf       )
495 #define icase        (G1.icase       )
496 #define exiting      (G1.exiting     )
497 #define nextrec      (G1.nextrec     )
498 #define nextfile     (G1.nextfile    )
499 #define is_f0_split  (G1.is_f0_split )
500 #define t_info       (G.t_info      )
501 #define t_tclass     (G.t_tclass    )
502 #define t_string     (G.t_string    )
503 #define t_lineno     (G.t_lineno    )
504 #define t_rollback   (G.t_rollback  )
505 #define intvar       (G.intvar      )
506 #define fsplitter    (G.fsplitter   )
507 #define rsplitter    (G.rsplitter   )
508 #define INIT_G() do { \
509         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510         G.next_token__ltclass = TC_OPTERM; \
511         G.evaluate__seed = 1; \
512 } while (0)
513
514
515 /* function prototypes */
516 static void handle_special(var *);
517 static node *parse_expr(uint32_t);
518 static void chain_group(void);
519 static var *evaluate(node *, var *);
520 static rstream *next_input_file(void);
521 static int fmt_num(char *, int, const char *, double, int);
522 static int awk_exit(int) NORETURN;
523
524 /* ---- error handling ---- */
525
526 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535 #if !ENABLE_FEATURE_AWK_LIBM
536 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
537 #endif
538
539 static void zero_out_var(var *vp)
540 {
541         memset(vp, 0, sizeof(*vp));
542 }
543
544 static void syntax_error(const char *message) NORETURN;
545 static void syntax_error(const char *message)
546 {
547         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
548 }
549
550 /* ---- hash stuff ---- */
551
552 static unsigned hashidx(const char *name)
553 {
554         unsigned idx = 0;
555
556         while (*name)
557                 idx = *name++ + (idx << 6) - idx;
558         return idx;
559 }
560
561 /* create new hash */
562 static xhash *hash_init(void)
563 {
564         xhash *newhash;
565
566         newhash = xzalloc(sizeof(*newhash));
567         newhash->csize = FIRST_PRIME;
568         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
569
570         return newhash;
571 }
572
573 /* find item in hash, return ptr to data, NULL if not found */
574 static void *hash_search(xhash *hash, const char *name)
575 {
576         hash_item *hi;
577
578         hi = hash->items[hashidx(name) % hash->csize];
579         while (hi) {
580                 if (strcmp(hi->name, name) == 0)
581                         return &hi->data;
582                 hi = hi->next;
583         }
584         return NULL;
585 }
586
587 /* grow hash if it becomes too big */
588 static void hash_rebuild(xhash *hash)
589 {
590         unsigned newsize, i, idx;
591         hash_item **newitems, *hi, *thi;
592
593         if (hash->nprime == ARRAY_SIZE(PRIMES))
594                 return;
595
596         newsize = PRIMES[hash->nprime++];
597         newitems = xzalloc(newsize * sizeof(newitems[0]));
598
599         for (i = 0; i < hash->csize; i++) {
600                 hi = hash->items[i];
601                 while (hi) {
602                         thi = hi;
603                         hi = thi->next;
604                         idx = hashidx(thi->name) % newsize;
605                         thi->next = newitems[idx];
606                         newitems[idx] = thi;
607                 }
608         }
609
610         free(hash->items);
611         hash->csize = newsize;
612         hash->items = newitems;
613 }
614
615 /* find item in hash, add it if necessary. Return ptr to data */
616 static void *hash_find(xhash *hash, const char *name)
617 {
618         hash_item *hi;
619         unsigned idx;
620         int l;
621
622         hi = hash_search(hash, name);
623         if (!hi) {
624                 if (++hash->nel / hash->csize > 10)
625                         hash_rebuild(hash);
626
627                 l = strlen(name) + 1;
628                 hi = xzalloc(sizeof(*hi) + l);
629                 strcpy(hi->name, name);
630
631                 idx = hashidx(name) % hash->csize;
632                 hi->next = hash->items[idx];
633                 hash->items[idx] = hi;
634                 hash->glen += l;
635         }
636         return &hi->data;
637 }
638
639 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
640 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
641 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
642 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
643
644 static void hash_remove(xhash *hash, const char *name)
645 {
646         hash_item *hi, **phi;
647
648         phi = &hash->items[hashidx(name) % hash->csize];
649         while (*phi) {
650                 hi = *phi;
651                 if (strcmp(hi->name, name) == 0) {
652                         hash->glen -= (strlen(name) + 1);
653                         hash->nel--;
654                         *phi = hi->next;
655                         free(hi);
656                         break;
657                 }
658                 phi = &hi->next;
659         }
660 }
661
662 /* ------ some useful functions ------ */
663
664 static char *skip_spaces(char *p)
665 {
666         while (1) {
667                 if (*p == '\\' && p[1] == '\n') {
668                         p++;
669                         t_lineno++;
670                 } else if (*p != ' ' && *p != '\t') {
671                         break;
672                 }
673                 p++;
674         }
675         return p;
676 }
677
678 /* returns old *s, advances *s past word and terminating NUL */
679 static char *nextword(char **s)
680 {
681         char *p = *s;
682         while (*(*s)++ != '\0')
683                 continue;
684         return p;
685 }
686
687 static char nextchar(char **s)
688 {
689         char c, *pps;
690
691         c = *(*s)++;
692         pps = *s;
693         if (c == '\\')
694                 c = bb_process_escape_sequence((const char**)s);
695         if (c == '\\' && *s == pps)
696                 c = *(*s)++;
697         return c;
698 }
699
700 static ALWAYS_INLINE int isalnum_(int c)
701 {
702         return (isalnum(c) || c == '_');
703 }
704
705 static double my_strtod(char **pp)
706 {
707         char *cp = *pp;
708 #if ENABLE_DESKTOP
709         if (cp[0] == '0') {
710                 /* Might be hex or octal integer: 0x123abc or 07777 */
711                 char c = (cp[1] | 0x20);
712                 if (c == 'x' || isdigit(cp[1])) {
713                         unsigned long long ull = strtoull(cp, pp, 0);
714                         if (c == 'x')
715                                 return ull;
716                         c = **pp;
717                         if (!isdigit(c) && c != '.')
718                                 return ull;
719                         /* else: it may be a floating number. Examples:
720                          * 009.123 (*pp points to '9')
721                          * 000.123 (*pp points to '.')
722                          * fall through to strtod.
723                          */
724                 }
725         }
726 #endif
727         return strtod(cp, pp);
728 }
729
730 /* -------- working with variables (set/get/copy/etc) -------- */
731
732 static xhash *iamarray(var *v)
733 {
734         var *a = v;
735
736         while (a->type & VF_CHILD)
737                 a = a->x.parent;
738
739         if (!(a->type & VF_ARRAY)) {
740                 a->type |= VF_ARRAY;
741                 a->x.array = hash_init();
742         }
743         return a->x.array;
744 }
745
746 static void clear_array(xhash *array)
747 {
748         unsigned i;
749         hash_item *hi, *thi;
750
751         for (i = 0; i < array->csize; i++) {
752                 hi = array->items[i];
753                 while (hi) {
754                         thi = hi;
755                         hi = hi->next;
756                         free(thi->data.v.string);
757                         free(thi);
758                 }
759                 array->items[i] = NULL;
760         }
761         array->glen = array->nel = 0;
762 }
763
764 /* clear a variable */
765 static var *clrvar(var *v)
766 {
767         if (!(v->type & VF_FSTR))
768                 free(v->string);
769
770         v->type &= VF_DONTTOUCH;
771         v->type |= VF_DIRTY;
772         v->string = NULL;
773         return v;
774 }
775
776 /* assign string value to variable */
777 static var *setvar_p(var *v, char *value)
778 {
779         clrvar(v);
780         v->string = value;
781         handle_special(v);
782         return v;
783 }
784
785 /* same as setvar_p but make a copy of string */
786 static var *setvar_s(var *v, const char *value)
787 {
788         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
789 }
790
791 /* same as setvar_s but sets USER flag */
792 static var *setvar_u(var *v, const char *value)
793 {
794         v = setvar_s(v, value);
795         v->type |= VF_USER;
796         return v;
797 }
798
799 /* set array element to user string */
800 static void setari_u(var *a, int idx, const char *s)
801 {
802         var *v;
803
804         v = findvar(iamarray(a), itoa(idx));
805         setvar_u(v, s);
806 }
807
808 /* assign numeric value to variable */
809 static var *setvar_i(var *v, double value)
810 {
811         clrvar(v);
812         v->type |= VF_NUMBER;
813         v->number = value;
814         handle_special(v);
815         return v;
816 }
817
818 static const char *getvar_s(var *v)
819 {
820         /* if v is numeric and has no cached string, convert it to string */
821         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
822                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
823                 v->string = xstrdup(g_buf);
824                 v->type |= VF_CACHED;
825         }
826         return (v->string == NULL) ? "" : v->string;
827 }
828
829 static double getvar_i(var *v)
830 {
831         char *s;
832
833         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
834                 v->number = 0;
835                 s = v->string;
836                 if (s && *s) {
837                         debug_printf_eval("getvar_i: '%s'->", s);
838                         v->number = my_strtod(&s);
839                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
840                         if (v->type & VF_USER) {
841                                 s = skip_spaces(s);
842                                 if (*s != '\0')
843                                         v->type &= ~VF_USER;
844                         }
845                 } else {
846                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
847                         v->type &= ~VF_USER;
848                 }
849                 v->type |= VF_CACHED;
850         }
851         debug_printf_eval("getvar_i: %f\n", v->number);
852         return v->number;
853 }
854
855 /* Used for operands of bitwise ops */
856 static unsigned long getvar_i_int(var *v)
857 {
858         double d = getvar_i(v);
859
860         /* Casting doubles to longs is undefined for values outside
861          * of target type range. Try to widen it as much as possible */
862         if (d >= 0)
863                 return (unsigned long)d;
864         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
865         return - (long) (unsigned long) (-d);
866 }
867
868 static var *copyvar(var *dest, const var *src)
869 {
870         if (dest != src) {
871                 clrvar(dest);
872                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
873                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
874                 dest->number = src->number;
875                 if (src->string)
876                         dest->string = xstrdup(src->string);
877         }
878         handle_special(dest);
879         return dest;
880 }
881
882 static var *incvar(var *v)
883 {
884         return setvar_i(v, getvar_i(v) + 1.0);
885 }
886
887 /* return true if v is number or numeric string */
888 static int is_numeric(var *v)
889 {
890         getvar_i(v);
891         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
892 }
893
894 /* return 1 when value of v corresponds to true, 0 otherwise */
895 static int istrue(var *v)
896 {
897         if (is_numeric(v))
898                 return (v->number != 0);
899         return (v->string && v->string[0]);
900 }
901
902 /* temporary variables allocator. Last allocated should be first freed */
903 static var *nvalloc(int n)
904 {
905         nvblock *pb = NULL;
906         var *v, *r;
907         int size;
908
909         while (g_cb) {
910                 pb = g_cb;
911                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
912                         break;
913                 g_cb = g_cb->next;
914         }
915
916         if (!g_cb) {
917                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
918                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
919                 g_cb->size = size;
920                 g_cb->pos = g_cb->nv;
921                 g_cb->prev = pb;
922                 /*g_cb->next = NULL; - xzalloc did it */
923                 if (pb)
924                         pb->next = g_cb;
925         }
926
927         v = r = g_cb->pos;
928         g_cb->pos += n;
929
930         while (v < g_cb->pos) {
931                 v->type = 0;
932                 v->string = NULL;
933                 v++;
934         }
935
936         return r;
937 }
938
939 static void nvfree(var *v)
940 {
941         var *p;
942
943         if (v < g_cb->nv || v >= g_cb->pos)
944                 syntax_error(EMSG_INTERNAL_ERROR);
945
946         for (p = v; p < g_cb->pos; p++) {
947                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
948                         clear_array(iamarray(p));
949                         free(p->x.array->items);
950                         free(p->x.array);
951                 }
952                 if (p->type & VF_WALK) {
953                         walker_list *n;
954                         walker_list *w = p->x.walker;
955                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
956                         p->x.walker = NULL;
957                         while (w) {
958                                 n = w->prev;
959                                 debug_printf_walker(" free(%p)\n", w);
960                                 free(w);
961                                 w = n;
962                         }
963                 }
964                 clrvar(p);
965         }
966
967         g_cb->pos = v;
968         while (g_cb->prev && g_cb->pos == g_cb->nv) {
969                 g_cb = g_cb->prev;
970         }
971 }
972
973 /* ------- awk program text parsing ------- */
974
975 /* Parse next token pointed by global pos, place results into global ttt.
976  * If token isn't expected, give away. Return token class
977  */
978 static uint32_t next_token(uint32_t expected)
979 {
980 #define concat_inserted (G.next_token__concat_inserted)
981 #define save_tclass     (G.next_token__save_tclass)
982 #define save_info       (G.next_token__save_info)
983 /* Initialized to TC_OPTERM: */
984 #define ltclass         (G.next_token__ltclass)
985
986         char *p, *s;
987         const char *tl;
988         uint32_t tc;
989         const uint32_t *ti;
990
991         if (t_rollback) {
992                 t_rollback = FALSE;
993
994         } else if (concat_inserted) {
995                 concat_inserted = FALSE;
996                 t_tclass = save_tclass;
997                 t_info = save_info;
998
999         } else {
1000                 p = g_pos;
1001  readnext:
1002                 p = skip_spaces(p);
1003                 g_lineno = t_lineno;
1004                 if (*p == '#')
1005                         while (*p != '\n' && *p != '\0')
1006                                 p++;
1007
1008                 if (*p == '\n')
1009                         t_lineno++;
1010
1011                 if (*p == '\0') {
1012                         tc = TC_EOF;
1013
1014                 } else if (*p == '\"') {
1015                         /* it's a string */
1016                         t_string = s = ++p;
1017                         while (*p != '\"') {
1018                                 char *pp = p;
1019                                 if (*p == '\0' || *p == '\n')
1020                                         syntax_error(EMSG_UNEXP_EOS);
1021                                 *s++ = nextchar(&pp);
1022                                 p = pp;
1023                         }
1024                         p++;
1025                         *s = '\0';
1026                         tc = TC_STRING;
1027
1028                 } else if ((expected & TC_REGEXP) && *p == '/') {
1029                         /* it's regexp */
1030                         t_string = s = ++p;
1031                         while (*p != '/') {
1032                                 if (*p == '\0' || *p == '\n')
1033                                         syntax_error(EMSG_UNEXP_EOS);
1034                                 *s = *p++;
1035                                 if (*s++ == '\\') {
1036                                         char *pp = p;
1037                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1038                                         if (*p == '\\')
1039                                                 *s++ = '\\';
1040                                         if (pp == p)
1041                                                 *s++ = *p++;
1042                                         else
1043                                                 p = pp;
1044                                 }
1045                         }
1046                         p++;
1047                         *s = '\0';
1048                         tc = TC_REGEXP;
1049
1050                 } else if (*p == '.' || isdigit(*p)) {
1051                         /* it's a number */
1052                         char *pp = p;
1053                         t_double = my_strtod(&pp);
1054                         p = pp;
1055                         if (*p == '.')
1056                                 syntax_error(EMSG_UNEXP_TOKEN);
1057                         tc = TC_NUMBER;
1058
1059                 } else {
1060                         /* search for something known */
1061                         tl = tokenlist;
1062                         tc = 0x00000001;
1063                         ti = tokeninfo;
1064                         while (*tl) {
1065                                 int l = (unsigned char) *tl++;
1066                                 if (l == (unsigned char) NTCC) {
1067                                         tc <<= 1;
1068                                         continue;
1069                                 }
1070                                 /* if token class is expected,
1071                                  * token matches,
1072                                  * and it's not a longer word,
1073                                  */
1074                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1075                                  && strncmp(p, tl, l) == 0
1076                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1077                                 ) {
1078                                         /* then this is what we are looking for */
1079                                         t_info = *ti;
1080                                         p += l;
1081                                         goto token_found;
1082                                 }
1083                                 ti++;
1084                                 tl += l;
1085                         }
1086                         /* not a known token */
1087
1088                         /* is it a name? (var/array/function) */
1089                         if (!isalnum_(*p))
1090                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1091                         /* yes */
1092                         t_string = --p;
1093                         while (isalnum_(*++p)) {
1094                                 p[-1] = *p;
1095                         }
1096                         p[-1] = '\0';
1097                         tc = TC_VARIABLE;
1098                         /* also consume whitespace between functionname and bracket */
1099                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1100                                 p = skip_spaces(p);
1101                         if (*p == '(') {
1102                                 tc = TC_FUNCTION;
1103                         } else {
1104                                 if (*p == '[') {
1105                                         p++;
1106                                         tc = TC_ARRAY;
1107                                 }
1108                         }
1109  token_found: ;
1110                 }
1111                 g_pos = p;
1112
1113                 /* skipping newlines in some cases */
1114                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1115                         goto readnext;
1116
1117                 /* insert concatenation operator when needed */
1118                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1119                         concat_inserted = TRUE;
1120                         save_tclass = tc;
1121                         save_info = t_info;
1122                         tc = TC_BINOP;
1123                         t_info = OC_CONCAT | SS | P(35);
1124                 }
1125
1126                 t_tclass = tc;
1127         }
1128         ltclass = t_tclass;
1129
1130         /* Are we ready for this? */
1131         if (!(ltclass & expected))
1132                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1133                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1134
1135         return ltclass;
1136 #undef concat_inserted
1137 #undef save_tclass
1138 #undef save_info
1139 #undef ltclass
1140 }
1141
1142 static void rollback_token(void)
1143 {
1144         t_rollback = TRUE;
1145 }
1146
1147 static node *new_node(uint32_t info)
1148 {
1149         node *n;
1150
1151         n = xzalloc(sizeof(node));
1152         n->info = info;
1153         n->lineno = g_lineno;
1154         return n;
1155 }
1156
1157 static void mk_re_node(const char *s, node *n, regex_t *re)
1158 {
1159         n->info = OC_REGEXP;
1160         n->l.re = re;
1161         n->r.ire = re + 1;
1162         xregcomp(re, s, REG_EXTENDED);
1163         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1164 }
1165
1166 static node *condition(void)
1167 {
1168         next_token(TC_SEQSTART);
1169         return parse_expr(TC_SEQTERM);
1170 }
1171
1172 /* parse expression terminated by given argument, return ptr
1173  * to built subtree. Terminator is eaten by parse_expr */
1174 static node *parse_expr(uint32_t iexp)
1175 {
1176         node sn;
1177         node *cn = &sn;
1178         node *vn, *glptr;
1179         uint32_t tc, xtc;
1180         var *v;
1181
1182         sn.info = PRIMASK;
1183         sn.r.n = glptr = NULL;
1184         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1185
1186         while (!((tc = next_token(xtc)) & iexp)) {
1187
1188                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1189                         /* input redirection (<) attached to glptr node */
1190                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1191                         cn->a.n = glptr;
1192                         xtc = TC_OPERAND | TC_UOPPRE;
1193                         glptr = NULL;
1194
1195                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1196                         /* for binary and postfix-unary operators, jump back over
1197                          * previous operators with higher priority */
1198                         vn = cn;
1199                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1200                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1201                         ) {
1202                                 vn = vn->a.n;
1203                         }
1204                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1205                                 t_info += P(6);
1206                         cn = vn->a.n->r.n = new_node(t_info);
1207                         cn->a.n = vn->a.n;
1208                         if (tc & TC_BINOP) {
1209                                 cn->l.n = vn;
1210                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1211                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1212                                         /* it's a pipe */
1213                                         next_token(TC_GETLINE);
1214                                         /* give maximum priority to this pipe */
1215                                         cn->info &= ~PRIMASK;
1216                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1217                                 }
1218                         } else {
1219                                 cn->r.n = vn;
1220                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1221                         }
1222                         vn->a.n = cn;
1223
1224                 } else {
1225                         /* for operands and prefix-unary operators, attach them
1226                          * to last node */
1227                         vn = cn;
1228                         cn = vn->r.n = new_node(t_info);
1229                         cn->a.n = vn;
1230                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1231                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1232                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1233                                 /* one should be very careful with switch on tclass -
1234                                  * only simple tclasses should be used! */
1235                                 switch (tc) {
1236                                 case TC_VARIABLE:
1237                                 case TC_ARRAY:
1238                                         cn->info = OC_VAR;
1239                                         v = hash_search(ahash, t_string);
1240                                         if (v != NULL) {
1241                                                 cn->info = OC_FNARG;
1242                                                 cn->l.aidx = v->x.aidx;
1243                                         } else {
1244                                                 cn->l.v = newvar(t_string);
1245                                         }
1246                                         if (tc & TC_ARRAY) {
1247                                                 cn->info |= xS;
1248                                                 cn->r.n = parse_expr(TC_ARRTERM);
1249                                         }
1250                                         break;
1251
1252                                 case TC_NUMBER:
1253                                 case TC_STRING:
1254                                         cn->info = OC_VAR;
1255                                         v = cn->l.v = xzalloc(sizeof(var));
1256                                         if (tc & TC_NUMBER)
1257                                                 setvar_i(v, t_double);
1258                                         else
1259                                                 setvar_s(v, t_string);
1260                                         break;
1261
1262                                 case TC_REGEXP:
1263                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1264                                         break;
1265
1266                                 case TC_FUNCTION:
1267                                         cn->info = OC_FUNC;
1268                                         cn->r.f = newfunc(t_string);
1269                                         cn->l.n = condition();
1270                                         break;
1271
1272                                 case TC_SEQSTART:
1273                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1274                                         cn->a.n = vn;
1275                                         break;
1276
1277                                 case TC_GETLINE:
1278                                         glptr = cn;
1279                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1280                                         break;
1281
1282                                 case TC_BUILTIN:
1283                                         cn->l.n = condition();
1284                                         break;
1285                                 }
1286                         }
1287                 }
1288         }
1289         return sn.r.n;
1290 }
1291
1292 /* add node to chain. Return ptr to alloc'd node */
1293 static node *chain_node(uint32_t info)
1294 {
1295         node *n;
1296
1297         if (!seq->first)
1298                 seq->first = seq->last = new_node(0);
1299
1300         if (seq->programname != g_progname) {
1301                 seq->programname = g_progname;
1302                 n = chain_node(OC_NEWSOURCE);
1303                 n->l.new_progname = xstrdup(g_progname);
1304         }
1305
1306         n = seq->last;
1307         n->info = info;
1308         seq->last = n->a.n = new_node(OC_DONE);
1309
1310         return n;
1311 }
1312
1313 static void chain_expr(uint32_t info)
1314 {
1315         node *n;
1316
1317         n = chain_node(info);
1318         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1319         if (t_tclass & TC_GRPTERM)
1320                 rollback_token();
1321 }
1322
1323 static node *chain_loop(node *nn)
1324 {
1325         node *n, *n2, *save_brk, *save_cont;
1326
1327         save_brk = break_ptr;
1328         save_cont = continue_ptr;
1329
1330         n = chain_node(OC_BR | Vx);
1331         continue_ptr = new_node(OC_EXEC);
1332         break_ptr = new_node(OC_EXEC);
1333         chain_group();
1334         n2 = chain_node(OC_EXEC | Vx);
1335         n2->l.n = nn;
1336         n2->a.n = n;
1337         continue_ptr->a.n = n2;
1338         break_ptr->a.n = n->r.n = seq->last;
1339
1340         continue_ptr = save_cont;
1341         break_ptr = save_brk;
1342
1343         return n;
1344 }
1345
1346 /* parse group and attach it to chain */
1347 static void chain_group(void)
1348 {
1349         uint32_t c;
1350         node *n, *n2, *n3;
1351
1352         do {
1353                 c = next_token(TC_GRPSEQ);
1354         } while (c & TC_NEWLINE);
1355
1356         if (c & TC_GRPSTART) {
1357                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1358                         if (t_tclass & TC_NEWLINE)
1359                                 continue;
1360                         rollback_token();
1361                         chain_group();
1362                 }
1363         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1364                 rollback_token();
1365                 chain_expr(OC_EXEC | Vx);
1366         } else {                                                /* TC_STATEMNT */
1367                 switch (t_info & OPCLSMASK) {
1368                 case ST_IF:
1369                         n = chain_node(OC_BR | Vx);
1370                         n->l.n = condition();
1371                         chain_group();
1372                         n2 = chain_node(OC_EXEC);
1373                         n->r.n = seq->last;
1374                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1375                                 chain_group();
1376                                 n2->a.n = seq->last;
1377                         } else {
1378                                 rollback_token();
1379                         }
1380                         break;
1381
1382                 case ST_WHILE:
1383                         n2 = condition();
1384                         n = chain_loop(NULL);
1385                         n->l.n = n2;
1386                         break;
1387
1388                 case ST_DO:
1389                         n2 = chain_node(OC_EXEC);
1390                         n = chain_loop(NULL);
1391                         n2->a.n = n->a.n;
1392                         next_token(TC_WHILE);
1393                         n->l.n = condition();
1394                         break;
1395
1396                 case ST_FOR:
1397                         next_token(TC_SEQSTART);
1398                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1399                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1400                                 if ((n2->info & OPCLSMASK) != OC_IN)
1401                                         syntax_error(EMSG_UNEXP_TOKEN);
1402                                 n = chain_node(OC_WALKINIT | VV);
1403                                 n->l.n = n2->l.n;
1404                                 n->r.n = n2->r.n;
1405                                 n = chain_loop(NULL);
1406                                 n->info = OC_WALKNEXT | Vx;
1407                                 n->l.n = n2->l.n;
1408                         } else {                        /* for (;;) */
1409                                 n = chain_node(OC_EXEC | Vx);
1410                                 n->l.n = n2;
1411                                 n2 = parse_expr(TC_SEMICOL);
1412                                 n3 = parse_expr(TC_SEQTERM);
1413                                 n = chain_loop(n3);
1414                                 n->l.n = n2;
1415                                 if (!n2)
1416                                         n->info = OC_EXEC;
1417                         }
1418                         break;
1419
1420                 case OC_PRINT:
1421                 case OC_PRINTF:
1422                         n = chain_node(t_info);
1423                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1424                         if (t_tclass & TC_OUTRDR) {
1425                                 n->info |= t_info;
1426                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1427                         }
1428                         if (t_tclass & TC_GRPTERM)
1429                                 rollback_token();
1430                         break;
1431
1432                 case OC_BREAK:
1433                         n = chain_node(OC_EXEC);
1434                         n->a.n = break_ptr;
1435                         break;
1436
1437                 case OC_CONTINUE:
1438                         n = chain_node(OC_EXEC);
1439                         n->a.n = continue_ptr;
1440                         break;
1441
1442                 /* delete, next, nextfile, return, exit */
1443                 default:
1444                         chain_expr(t_info);
1445                 }
1446         }
1447 }
1448
1449 static void parse_program(char *p)
1450 {
1451         uint32_t tclass;
1452         node *cn;
1453         func *f;
1454         var *v;
1455
1456         g_pos = p;
1457         t_lineno = 1;
1458         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1459                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1460
1461                 if (tclass & TC_OPTERM)
1462                         continue;
1463
1464                 seq = &mainseq;
1465                 if (tclass & TC_BEGIN) {
1466                         seq = &beginseq;
1467                         chain_group();
1468
1469                 } else if (tclass & TC_END) {
1470                         seq = &endseq;
1471                         chain_group();
1472
1473                 } else if (tclass & TC_FUNCDECL) {
1474                         next_token(TC_FUNCTION);
1475                         g_pos++;
1476                         f = newfunc(t_string);
1477                         f->body.first = NULL;
1478                         f->nargs = 0;
1479                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1480                                 v = findvar(ahash, t_string);
1481                                 v->x.aidx = f->nargs++;
1482
1483                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1484                                         break;
1485                         }
1486                         seq = &f->body;
1487                         chain_group();
1488                         clear_array(ahash);
1489
1490                 } else if (tclass & TC_OPSEQ) {
1491                         rollback_token();
1492                         cn = chain_node(OC_TEST);
1493                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1494                         if (t_tclass & TC_GRPSTART) {
1495                                 rollback_token();
1496                                 chain_group();
1497                         } else {
1498                                 chain_node(OC_PRINT);
1499                         }
1500                         cn->r.n = mainseq.last;
1501
1502                 } else /* if (tclass & TC_GRPSTART) */ {
1503                         rollback_token();
1504                         chain_group();
1505                 }
1506         }
1507 }
1508
1509
1510 /* -------- program execution part -------- */
1511
1512 static node *mk_splitter(const char *s, tsplitter *spl)
1513 {
1514         regex_t *re, *ire;
1515         node *n;
1516
1517         re = &spl->re[0];
1518         ire = &spl->re[1];
1519         n = &spl->n;
1520         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1521                 regfree(re);
1522                 regfree(ire); // TODO: nuke ire, use re+1?
1523         }
1524         if (s[0] && s[1]) { /* strlen(s) > 1 */
1525                 mk_re_node(s, n, re);
1526         } else {
1527                 n->info = (uint32_t) s[0];
1528         }
1529
1530         return n;
1531 }
1532
1533 /* use node as a regular expression. Supplied with node ptr and regex_t
1534  * storage space. Return ptr to regex (if result points to preg, it should
1535  * be later regfree'd manually
1536  */
1537 static regex_t *as_regex(node *op, regex_t *preg)
1538 {
1539         int cflags;
1540         var *v;
1541         const char *s;
1542
1543         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1544                 return icase ? op->r.ire : op->l.re;
1545         }
1546         v = nvalloc(1);
1547         s = getvar_s(evaluate(op, v));
1548
1549         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1550         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1551          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1552          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1553          * (maybe gsub is not supposed to use REG_EXTENDED?).
1554          */
1555         if (regcomp(preg, s, cflags)) {
1556                 cflags &= ~REG_EXTENDED;
1557                 xregcomp(preg, s, cflags);
1558         }
1559         nvfree(v);
1560         return preg;
1561 }
1562
1563 /* gradually increasing buffer.
1564  * note that we reallocate even if n == old_size,
1565  * and thus there is at least one extra allocated byte.
1566  */
1567 static char* qrealloc(char *b, int n, int *size)
1568 {
1569         if (!b || n >= *size) {
1570                 *size = n + (n>>1) + 80;
1571                 b = xrealloc(b, *size);
1572         }
1573         return b;
1574 }
1575
1576 /* resize field storage space */
1577 static void fsrealloc(int size)
1578 {
1579         int i;
1580
1581         if (size >= maxfields) {
1582                 i = maxfields;
1583                 maxfields = size + 16;
1584                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1585                 for (; i < maxfields; i++) {
1586                         Fields[i].type = VF_SPECIAL;
1587                         Fields[i].string = NULL;
1588                 }
1589         }
1590         /* if size < nfields, clear extra field variables */
1591         for (i = size; i < nfields; i++) {
1592                 clrvar(Fields + i);
1593         }
1594         nfields = size;
1595 }
1596
1597 static int awk_split(const char *s, node *spl, char **slist)
1598 {
1599         int l, n;
1600         char c[4];
1601         char *s1;
1602         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1603
1604         /* in worst case, each char would be a separate field */
1605         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1606         strcpy(s1, s);
1607
1608         c[0] = c[1] = (char)spl->info;
1609         c[2] = c[3] = '\0';
1610         if (*getvar_s(intvar[RS]) == '\0')
1611                 c[2] = '\n';
1612
1613         n = 0;
1614         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1615                 if (!*s)
1616                         return n; /* "": zero fields */
1617                 n++; /* at least one field will be there */
1618                 do {
1619                         l = strcspn(s, c+2); /* len till next NUL or \n */
1620                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1621                          && pmatch[0].rm_so <= l
1622                         ) {
1623                                 l = pmatch[0].rm_so;
1624                                 if (pmatch[0].rm_eo == 0) {
1625                                         l++;
1626                                         pmatch[0].rm_eo++;
1627                                 }
1628                                 n++; /* we saw yet another delimiter */
1629                         } else {
1630                                 pmatch[0].rm_eo = l;
1631                                 if (s[l])
1632                                         pmatch[0].rm_eo++;
1633                         }
1634                         memcpy(s1, s, l);
1635                         /* make sure we remove *all* of the separator chars */
1636                         do {
1637                                 s1[l] = '\0';
1638                         } while (++l < pmatch[0].rm_eo);
1639                         nextword(&s1);
1640                         s += pmatch[0].rm_eo;
1641                 } while (*s);
1642                 return n;
1643         }
1644         if (c[0] == '\0') {  /* null split */
1645                 while (*s) {
1646                         *s1++ = *s++;
1647                         *s1++ = '\0';
1648                         n++;
1649                 }
1650                 return n;
1651         }
1652         if (c[0] != ' ') {  /* single-character split */
1653                 if (icase) {
1654                         c[0] = toupper(c[0]);
1655                         c[1] = tolower(c[1]);
1656                 }
1657                 if (*s1)
1658                         n++;
1659                 while ((s1 = strpbrk(s1, c)) != NULL) {
1660                         *s1++ = '\0';
1661                         n++;
1662                 }
1663                 return n;
1664         }
1665         /* space split */
1666         while (*s) {
1667                 s = skip_whitespace(s);
1668                 if (!*s)
1669                         break;
1670                 n++;
1671                 while (*s && !isspace(*s))
1672                         *s1++ = *s++;
1673                 *s1++ = '\0';
1674         }
1675         return n;
1676 }
1677
1678 static void split_f0(void)
1679 {
1680 /* static char *fstrings; */
1681 #define fstrings (G.split_f0__fstrings)
1682
1683         int i, n;
1684         char *s;
1685
1686         if (is_f0_split)
1687                 return;
1688
1689         is_f0_split = TRUE;
1690         free(fstrings);
1691         fsrealloc(0);
1692         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1693         fsrealloc(n);
1694         s = fstrings;
1695         for (i = 0; i < n; i++) {
1696                 Fields[i].string = nextword(&s);
1697                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1698         }
1699
1700         /* set NF manually to avoid side effects */
1701         clrvar(intvar[NF]);
1702         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1703         intvar[NF]->number = nfields;
1704 #undef fstrings
1705 }
1706
1707 /* perform additional actions when some internal variables changed */
1708 static void handle_special(var *v)
1709 {
1710         int n;
1711         char *b;
1712         const char *sep, *s;
1713         int sl, l, len, i, bsize;
1714
1715         if (!(v->type & VF_SPECIAL))
1716                 return;
1717
1718         if (v == intvar[NF]) {
1719                 n = (int)getvar_i(v);
1720                 fsrealloc(n);
1721
1722                 /* recalculate $0 */
1723                 sep = getvar_s(intvar[OFS]);
1724                 sl = strlen(sep);
1725                 b = NULL;
1726                 len = 0;
1727                 for (i = 0; i < n; i++) {
1728                         s = getvar_s(&Fields[i]);
1729                         l = strlen(s);
1730                         if (b) {
1731                                 memcpy(b+len, sep, sl);
1732                                 len += sl;
1733                         }
1734                         b = qrealloc(b, len+l+sl, &bsize);
1735                         memcpy(b+len, s, l);
1736                         len += l;
1737                 }
1738                 if (b)
1739                         b[len] = '\0';
1740                 setvar_p(intvar[F0], b);
1741                 is_f0_split = TRUE;
1742
1743         } else if (v == intvar[F0]) {
1744                 is_f0_split = FALSE;
1745
1746         } else if (v == intvar[FS]) {
1747                 mk_splitter(getvar_s(v), &fsplitter);
1748
1749         } else if (v == intvar[RS]) {
1750                 mk_splitter(getvar_s(v), &rsplitter);
1751
1752         } else if (v == intvar[IGNORECASE]) {
1753                 icase = istrue(v);
1754
1755         } else {                                /* $n */
1756                 n = getvar_i(intvar[NF]);
1757                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1758                 /* right here v is invalid. Just to note... */
1759         }
1760 }
1761
1762 /* step through func/builtin/etc arguments */
1763 static node *nextarg(node **pn)
1764 {
1765         node *n;
1766
1767         n = *pn;
1768         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1769                 *pn = n->r.n;
1770                 n = n->l.n;
1771         } else {
1772                 *pn = NULL;
1773         }
1774         return n;
1775 }
1776
1777 static void hashwalk_init(var *v, xhash *array)
1778 {
1779         hash_item *hi;
1780         unsigned i;
1781         walker_list *w;
1782         walker_list *prev_walker;
1783
1784         if (v->type & VF_WALK) {
1785                 prev_walker = v->x.walker;
1786         } else {
1787                 v->type |= VF_WALK;
1788                 prev_walker = NULL;
1789         }
1790         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1791
1792         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1793         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1794         w->cur = w->end = w->wbuf;
1795         w->prev = prev_walker;
1796         for (i = 0; i < array->csize; i++) {
1797                 hi = array->items[i];
1798                 while (hi) {
1799                         strcpy(w->end, hi->name);
1800                         nextword(&w->end);
1801                         hi = hi->next;
1802                 }
1803         }
1804 }
1805
1806 static int hashwalk_next(var *v)
1807 {
1808         walker_list *w = v->x.walker;
1809
1810         if (w->cur >= w->end) {
1811                 walker_list *prev_walker = w->prev;
1812
1813                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1814                 free(w);
1815                 v->x.walker = prev_walker;
1816                 return FALSE;
1817         }
1818
1819         setvar_s(v, nextword(&w->cur));
1820         return TRUE;
1821 }
1822
1823 /* evaluate node, return 1 when result is true, 0 otherwise */
1824 static int ptest(node *pattern)
1825 {
1826         /* ptest__v is "static": to save stack space? */
1827         return istrue(evaluate(pattern, &G.ptest__v));
1828 }
1829
1830 /* read next record from stream rsm into a variable v */
1831 static int awk_getline(rstream *rsm, var *v)
1832 {
1833         char *b;
1834         regmatch_t pmatch[2];
1835         int size, a, p, pp = 0;
1836         int fd, so, eo, r, rp;
1837         char c, *m, *s;
1838
1839         /* we're using our own buffer since we need access to accumulating
1840          * characters
1841          */
1842         fd = fileno(rsm->F);
1843         m = rsm->buffer;
1844         a = rsm->adv;
1845         p = rsm->pos;
1846         size = rsm->size;
1847         c = (char) rsplitter.n.info;
1848         rp = 0;
1849
1850         if (!m)
1851                 m = qrealloc(m, 256, &size);
1852
1853         do {
1854                 b = m + a;
1855                 so = eo = p;
1856                 r = 1;
1857                 if (p > 0) {
1858                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1859                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1860                                                         b, 1, pmatch, 0) == 0) {
1861                                         so = pmatch[0].rm_so;
1862                                         eo = pmatch[0].rm_eo;
1863                                         if (b[eo] != '\0')
1864                                                 break;
1865                                 }
1866                         } else if (c != '\0') {
1867                                 s = strchr(b+pp, c);
1868                                 if (!s)
1869                                         s = memchr(b+pp, '\0', p - pp);
1870                                 if (s) {
1871                                         so = eo = s-b;
1872                                         eo++;
1873                                         break;
1874                                 }
1875                         } else {
1876                                 while (b[rp] == '\n')
1877                                         rp++;
1878                                 s = strstr(b+rp, "\n\n");
1879                                 if (s) {
1880                                         so = eo = s-b;
1881                                         while (b[eo] == '\n')
1882                                                 eo++;
1883                                         if (b[eo] != '\0')
1884                                                 break;
1885                                 }
1886                         }
1887                 }
1888
1889                 if (a > 0) {
1890                         memmove(m, m+a, p+1);
1891                         b = m;
1892                         a = 0;
1893                 }
1894
1895                 m = qrealloc(m, a+p+128, &size);
1896                 b = m + a;
1897                 pp = p;
1898                 p += safe_read(fd, b+p, size-p-1);
1899                 if (p < pp) {
1900                         p = 0;
1901                         r = 0;
1902                         setvar_i(intvar[ERRNO], errno);
1903                 }
1904                 b[p] = '\0';
1905
1906         } while (p > pp);
1907
1908         if (p == 0) {
1909                 r--;
1910         } else {
1911                 c = b[so]; b[so] = '\0';
1912                 setvar_s(v, b+rp);
1913                 v->type |= VF_USER;
1914                 b[so] = c;
1915                 c = b[eo]; b[eo] = '\0';
1916                 setvar_s(intvar[RT], b+so);
1917                 b[eo] = c;
1918         }
1919
1920         rsm->buffer = m;
1921         rsm->adv = a + eo;
1922         rsm->pos = p - eo;
1923         rsm->size = size;
1924
1925         return r;
1926 }
1927
1928 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1929 {
1930         int r = 0;
1931         char c;
1932         const char *s = format;
1933
1934         if (int_as_int && n == (int)n) {
1935                 r = snprintf(b, size, "%d", (int)n);
1936         } else {
1937                 do { c = *s; } while (c && *++s);
1938                 if (strchr("diouxX", c)) {
1939                         r = snprintf(b, size, format, (int)n);
1940                 } else if (strchr("eEfgG", c)) {
1941                         r = snprintf(b, size, format, n);
1942                 } else {
1943                         syntax_error(EMSG_INV_FMT);
1944                 }
1945         }
1946         return r;
1947 }
1948
1949 /* formatted output into an allocated buffer, return ptr to buffer */
1950 static char *awk_printf(node *n)
1951 {
1952         char *b = NULL;
1953         char *fmt, *s, *f;
1954         const char *s1;
1955         int i, j, incr, bsize;
1956         char c, c1;
1957         var *v, *arg;
1958
1959         v = nvalloc(1);
1960         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1961
1962         i = 0;
1963         while (*f) {
1964                 s = f;
1965                 while (*f && (*f != '%' || *++f == '%'))
1966                         f++;
1967                 while (*f && !isalpha(*f)) {
1968                         if (*f == '*')
1969                                 syntax_error("%*x formats are not supported");
1970                         f++;
1971                 }
1972
1973                 incr = (f - s) + MAXVARFMT;
1974                 b = qrealloc(b, incr + i, &bsize);
1975                 c = *f;
1976                 if (c != '\0')
1977                         f++;
1978                 c1 = *f;
1979                 *f = '\0';
1980                 arg = evaluate(nextarg(&n), v);
1981
1982                 j = i;
1983                 if (c == 'c' || !c) {
1984                         i += sprintf(b+i, s, is_numeric(arg) ?
1985                                         (char)getvar_i(arg) : *getvar_s(arg));
1986                 } else if (c == 's') {
1987                         s1 = getvar_s(arg);
1988                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1989                         i += sprintf(b+i, s, s1);
1990                 } else {
1991                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1992                 }
1993                 *f = c1;
1994
1995                 /* if there was an error while sprintf, return value is negative */
1996                 if (i < j)
1997                         i = j;
1998         }
1999
2000         free(fmt);
2001         nvfree(v);
2002         b = xrealloc(b, i + 1);
2003         b[i] = '\0';
2004         return b;
2005 }
2006
2007 /* Common substitution routine.
2008  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2009  * store result into (dest), return number of substitutions.
2010  * If nm = 0, replace all matches.
2011  * If src or dst is NULL, use $0.
2012  * If subexp != 0, enable subexpression matching (\1-\9).
2013  */
2014 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2015 {
2016         char *resbuf;
2017         const char *sp;
2018         int match_no, residx, replen, resbufsize;
2019         int regexec_flags;
2020         regmatch_t pmatch[10];
2021         regex_t sreg, *regex;
2022
2023         resbuf = NULL;
2024         residx = 0;
2025         match_no = 0;
2026         regexec_flags = 0;
2027         regex = as_regex(rn, &sreg);
2028         sp = getvar_s(src ? src : intvar[F0]);
2029         replen = strlen(repl);
2030         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2031                 int so = pmatch[0].rm_so;
2032                 int eo = pmatch[0].rm_eo;
2033
2034                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2035                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2036                 memcpy(resbuf + residx, sp, eo);
2037                 residx += eo;
2038                 if (++match_no >= nm) {
2039                         const char *s;
2040                         int nbs;
2041
2042                         /* replace */
2043                         residx -= (eo - so);
2044                         nbs = 0;
2045                         for (s = repl; *s; s++) {
2046                                 char c = resbuf[residx++] = *s;
2047                                 if (c == '\\') {
2048                                         nbs++;
2049                                         continue;
2050                                 }
2051                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2052                                         int j;
2053                                         residx -= ((nbs + 3) >> 1);
2054                                         j = 0;
2055                                         if (c != '&') {
2056                                                 j = c - '0';
2057                                                 nbs++;
2058                                         }
2059                                         if (nbs % 2) {
2060                                                 resbuf[residx++] = c;
2061                                         } else {
2062                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2063                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2064                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2065                                                 residx += n;
2066                                         }
2067                                 }
2068                                 nbs = 0;
2069                         }
2070                 }
2071
2072                 regexec_flags = REG_NOTBOL;
2073                 sp += eo;
2074                 if (match_no == nm)
2075                         break;
2076                 if (eo == so) {
2077                         /* Empty match (e.g. "b*" will match anywhere).
2078                          * Advance by one char. */
2079 //BUG (bug 1333):
2080 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2081 //... and will erroneously match "b" even though it is NOT at the word start.
2082 //we need REG_NOTBOW but it does not exist...
2083 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2084 //it should be able to do it correctly.
2085                         /* Subtle: this is safe only because
2086                          * qrealloc allocated at least one extra byte */
2087                         resbuf[residx] = *sp;
2088                         if (*sp == '\0')
2089                                 goto ret;
2090                         sp++;
2091                         residx++;
2092                 }
2093         }
2094
2095         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2096         strcpy(resbuf + residx, sp);
2097  ret:
2098         //bb_error_msg("end sp:'%s'%p", sp,sp);
2099         setvar_p(dest ? dest : intvar[F0], resbuf);
2100         if (regex == &sreg)
2101                 regfree(regex);
2102         return match_no;
2103 }
2104
2105 static NOINLINE int do_mktime(const char *ds)
2106 {
2107         struct tm then;
2108         int count;
2109
2110         /*memset(&then, 0, sizeof(then)); - not needed */
2111         then.tm_isdst = -1; /* default is unknown */
2112
2113         /* manpage of mktime says these fields are ints,
2114          * so we can sscanf stuff directly into them */
2115         count = sscanf(ds, "%u %u %u %u %u %u %d",
2116                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2117                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2118                 &then.tm_isdst);
2119
2120         if (count < 6
2121          || (unsigned)then.tm_mon < 1
2122          || (unsigned)then.tm_year < 1900
2123         ) {
2124                 return -1;
2125         }
2126
2127         then.tm_mon -= 1;
2128         then.tm_year -= 1900;
2129
2130         return mktime(&then);
2131 }
2132
2133 static NOINLINE var *exec_builtin(node *op, var *res)
2134 {
2135 #define tspl (G.exec_builtin__tspl)
2136
2137         var *tv;
2138         node *an[4];
2139         var *av[4];
2140         const char *as[4];
2141         regmatch_t pmatch[2];
2142         regex_t sreg, *re;
2143         node *spl;
2144         uint32_t isr, info;
2145         int nargs;
2146         time_t tt;
2147         int i, l, ll, n;
2148
2149         tv = nvalloc(4);
2150         isr = info = op->info;
2151         op = op->l.n;
2152
2153         av[2] = av[3] = NULL;
2154         for (i = 0; i < 4 && op; i++) {
2155                 an[i] = nextarg(&op);
2156                 if (isr & 0x09000000)
2157                         av[i] = evaluate(an[i], &tv[i]);
2158                 if (isr & 0x08000000)
2159                         as[i] = getvar_s(av[i]);
2160                 isr >>= 1;
2161         }
2162
2163         nargs = i;
2164         if ((uint32_t)nargs < (info >> 30))
2165                 syntax_error(EMSG_TOO_FEW_ARGS);
2166
2167         info &= OPNMASK;
2168         switch (info) {
2169
2170         case B_a2:
2171 #if ENABLE_FEATURE_AWK_LIBM
2172                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2173 #else
2174                 syntax_error(EMSG_NO_MATH);
2175 #endif
2176                 break;
2177
2178         case B_sp: {
2179                 char *s, *s1;
2180
2181                 if (nargs > 2) {
2182                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2183                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2184                 } else {
2185                         spl = &fsplitter.n;
2186                 }
2187
2188                 n = awk_split(as[0], spl, &s);
2189                 s1 = s;
2190                 clear_array(iamarray(av[1]));
2191                 for (i = 1; i <= n; i++)
2192                         setari_u(av[1], i, nextword(&s));
2193                 free(s1);
2194                 setvar_i(res, n);
2195                 break;
2196         }
2197
2198         case B_ss: {
2199                 char *s;
2200
2201                 l = strlen(as[0]);
2202                 i = getvar_i(av[1]) - 1;
2203                 if (i > l)
2204                         i = l;
2205                 if (i < 0)
2206                         i = 0;
2207                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2208                 if (n < 0)
2209                         n = 0;
2210                 s = xstrndup(as[0]+i, n);
2211                 setvar_p(res, s);
2212                 break;
2213         }
2214
2215         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2216          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2217         case B_an:
2218                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2219                 break;
2220
2221         case B_co:
2222                 setvar_i(res, ~getvar_i_int(av[0]));
2223                 break;
2224
2225         case B_ls:
2226                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2227                 break;
2228
2229         case B_or:
2230                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2231                 break;
2232
2233         case B_rs:
2234                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2235                 break;
2236
2237         case B_xo:
2238                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2239                 break;
2240
2241         case B_lo:
2242         case B_up: {
2243                 char *s, *s1;
2244                 s1 = s = xstrdup(as[0]);
2245                 while (*s1) {
2246                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2247                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2248                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2249                         s1++;
2250                 }
2251                 setvar_p(res, s);
2252                 break;
2253         }
2254
2255         case B_ix:
2256                 n = 0;
2257                 ll = strlen(as[1]);
2258                 l = strlen(as[0]) - ll;
2259                 if (ll > 0 && l >= 0) {
2260                         if (!icase) {
2261                                 char *s = strstr(as[0], as[1]);
2262                                 if (s)
2263                                         n = (s - as[0]) + 1;
2264                         } else {
2265                                 /* this piece of code is terribly slow and
2266                                  * really should be rewritten
2267                                  */
2268                                 for (i = 0; i <= l; i++) {
2269                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2270                                                 n = i+1;
2271                                                 break;
2272                                         }
2273                                 }
2274                         }
2275                 }
2276                 setvar_i(res, n);
2277                 break;
2278
2279         case B_ti:
2280                 if (nargs > 1)
2281                         tt = getvar_i(av[1]);
2282                 else
2283                         time(&tt);
2284                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2285                 i = strftime(g_buf, MAXVARFMT,
2286                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2287                         localtime(&tt));
2288                 g_buf[i] = '\0';
2289                 setvar_s(res, g_buf);
2290                 break;
2291
2292         case B_mt:
2293                 setvar_i(res, do_mktime(as[0]));
2294                 break;
2295
2296         case B_ma:
2297                 re = as_regex(an[1], &sreg);
2298                 n = regexec(re, as[0], 1, pmatch, 0);
2299                 if (n == 0) {
2300                         pmatch[0].rm_so++;
2301                         pmatch[0].rm_eo++;
2302                 } else {
2303                         pmatch[0].rm_so = 0;
2304                         pmatch[0].rm_eo = -1;
2305                 }
2306                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2307                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2308                 setvar_i(res, pmatch[0].rm_so);
2309                 if (re == &sreg)
2310                         regfree(re);
2311                 break;
2312
2313         case B_ge:
2314                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2315                 break;
2316
2317         case B_gs:
2318                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2319                 break;
2320
2321         case B_su:
2322                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2323                 break;
2324         }
2325
2326         nvfree(tv);
2327         return res;
2328 #undef tspl
2329 }
2330
2331 /*
2332  * Evaluate node - the heart of the program. Supplied with subtree
2333  * and place where to store result. returns ptr to result.
2334  */
2335 #define XC(n) ((n) >> 8)
2336
2337 static var *evaluate(node *op, var *res)
2338 {
2339 /* This procedure is recursive so we should count every byte */
2340 #define fnargs (G.evaluate__fnargs)
2341 /* seed is initialized to 1 */
2342 #define seed   (G.evaluate__seed)
2343 #define sreg   (G.evaluate__sreg)
2344
2345         var *v1;
2346
2347         if (!op)
2348                 return setvar_s(res, NULL);
2349
2350         v1 = nvalloc(2);
2351
2352         while (op) {
2353                 struct {
2354                         var *v;
2355                         const char *s;
2356                 } L = L; /* for compiler */
2357                 struct {
2358                         var *v;
2359                         const char *s;
2360                 } R = R;
2361                 double L_d = L_d;
2362                 uint32_t opinfo;
2363                 int opn;
2364                 node *op1;
2365
2366                 opinfo = op->info;
2367                 opn = (opinfo & OPNMASK);
2368                 g_lineno = op->lineno;
2369                 op1 = op->l.n;
2370                 debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK));
2371
2372                 /* execute inevitable things */
2373                 if (opinfo & OF_RES1)
2374                         L.v = evaluate(op1, v1);
2375                 if (opinfo & OF_RES2)
2376                         R.v = evaluate(op->r.n, v1+1);
2377                 if (opinfo & OF_STR1) {
2378                         L.s = getvar_s(L.v);
2379                         debug_printf_eval("L.s:'%s'\n", L.s);
2380                 }
2381                 if (opinfo & OF_STR2) {
2382                         R.s = getvar_s(R.v);
2383                         debug_printf_eval("R.s:'%s'\n", R.s);
2384                 }
2385                 if (opinfo & OF_NUM1) {
2386                         L_d = getvar_i(L.v);
2387                         debug_printf_eval("L_d:%f\n", L_d);
2388                 }
2389
2390                 switch (XC(opinfo & OPCLSMASK)) {
2391
2392                 /* -- iterative node type -- */
2393
2394                 /* test pattern */
2395                 case XC( OC_TEST ):
2396                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2397                                 /* it's range pattern */
2398                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2399                                         op->info |= OF_CHECKED;
2400                                         if (ptest(op1->r.n))
2401                                                 op->info &= ~OF_CHECKED;
2402                                         op = op->a.n;
2403                                 } else {
2404                                         op = op->r.n;
2405                                 }
2406                         } else {
2407                                 op = ptest(op1) ? op->a.n : op->r.n;
2408                         }
2409                         break;
2410
2411                 /* just evaluate an expression, also used as unconditional jump */
2412                 case XC( OC_EXEC ):
2413                         break;
2414
2415                 /* branch, used in if-else and various loops */
2416                 case XC( OC_BR ):
2417                         op = istrue(L.v) ? op->a.n : op->r.n;
2418                         break;
2419
2420                 /* initialize for-in loop */
2421                 case XC( OC_WALKINIT ):
2422                         hashwalk_init(L.v, iamarray(R.v));
2423                         break;
2424
2425                 /* get next array item */
2426                 case XC( OC_WALKNEXT ):
2427                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2428                         break;
2429
2430                 case XC( OC_PRINT ):
2431                 case XC( OC_PRINTF ): {
2432                         FILE *F = stdout;
2433
2434                         if (op->r.n) {
2435                                 rstream *rsm = newfile(R.s);
2436                                 if (!rsm->F) {
2437                                         if (opn == '|') {
2438                                                 rsm->F = popen(R.s, "w");
2439                                                 if (rsm->F == NULL)
2440                                                         bb_perror_msg_and_die("popen");
2441                                                 rsm->is_pipe = 1;
2442                                         } else {
2443                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2444                                         }
2445                                 }
2446                                 F = rsm->F;
2447                         }
2448
2449                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2450                                 if (!op1) {
2451                                         fputs(getvar_s(intvar[F0]), F);
2452                                 } else {
2453                                         while (op1) {
2454                                                 var *v = evaluate(nextarg(&op1), v1);
2455                                                 if (v->type & VF_NUMBER) {
2456                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2457                                                                         getvar_i(v), TRUE);
2458                                                         fputs(g_buf, F);
2459                                                 } else {
2460                                                         fputs(getvar_s(v), F);
2461                                                 }
2462
2463                                                 if (op1)
2464                                                         fputs(getvar_s(intvar[OFS]), F);
2465                                         }
2466                                 }
2467                                 fputs(getvar_s(intvar[ORS]), F);
2468
2469                         } else {        /* OC_PRINTF */
2470                                 char *s = awk_printf(op1);
2471                                 fputs(s, F);
2472                                 free(s);
2473                         }
2474                         fflush(F);
2475                         break;
2476                 }
2477
2478                 case XC( OC_DELETE ): {
2479                         uint32_t info = op1->info & OPCLSMASK;
2480                         var *v;
2481
2482                         if (info == OC_VAR) {
2483                                 v = op1->l.v;
2484                         } else if (info == OC_FNARG) {
2485                                 v = &fnargs[op1->l.aidx];
2486                         } else {
2487                                 syntax_error(EMSG_NOT_ARRAY);
2488                         }
2489
2490                         if (op1->r.n) {
2491                                 const char *s;
2492                                 clrvar(L.v);
2493                                 s = getvar_s(evaluate(op1->r.n, v1));
2494                                 hash_remove(iamarray(v), s);
2495                         } else {
2496                                 clear_array(iamarray(v));
2497                         }
2498                         break;
2499                 }
2500
2501                 case XC( OC_NEWSOURCE ):
2502                         g_progname = op->l.new_progname;
2503                         break;
2504
2505                 case XC( OC_RETURN ):
2506                         copyvar(res, L.v);
2507                         break;
2508
2509                 case XC( OC_NEXTFILE ):
2510                         nextfile = TRUE;
2511                 case XC( OC_NEXT ):
2512                         nextrec = TRUE;
2513                 case XC( OC_DONE ):
2514                         clrvar(res);
2515                         break;
2516
2517                 case XC( OC_EXIT ):
2518                         awk_exit(L_d);
2519
2520                 /* -- recursive node type -- */
2521
2522                 case XC( OC_VAR ):
2523                         L.v = op->l.v;
2524                         if (L.v == intvar[NF])
2525                                 split_f0();
2526                         goto v_cont;
2527
2528                 case XC( OC_FNARG ):
2529                         L.v = &fnargs[op->l.aidx];
2530  v_cont:
2531                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2532                         break;
2533
2534                 case XC( OC_IN ):
2535                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2536                         break;
2537
2538                 case XC( OC_REGEXP ):
2539                         op1 = op;
2540                         L.s = getvar_s(intvar[F0]);
2541                         goto re_cont;
2542
2543                 case XC( OC_MATCH ):
2544                         op1 = op->r.n;
2545  re_cont:
2546                         {
2547                                 regex_t *re = as_regex(op1, &sreg);
2548                                 int i = regexec(re, L.s, 0, NULL, 0);
2549                                 if (re == &sreg)
2550                                         regfree(re);
2551                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2552                         }
2553                         break;
2554
2555                 case XC( OC_MOVE ):
2556                         debug_printf_eval("MOVE\n");
2557                         /* if source is a temporary string, jusk relink it to dest */
2558 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2559 //then L.v ends up being a string, which is wrong
2560 //                      if (R.v == v1+1 && R.v->string) {
2561 //                              res = setvar_p(L.v, R.v->string);
2562 //                              R.v->string = NULL;
2563 //                      } else {
2564                                 res = copyvar(L.v, R.v);
2565 //                      }
2566                         break;
2567
2568                 case XC( OC_TERNARY ):
2569                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2570                                 syntax_error(EMSG_POSSIBLE_ERROR);
2571                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2572                         break;
2573
2574                 case XC( OC_FUNC ): {
2575                         var *vbeg, *v;
2576                         const char *sv_progname;
2577
2578                         if (!op->r.f->body.first)
2579                                 syntax_error(EMSG_UNDEF_FUNC);
2580
2581                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2582                         while (op1) {
2583                                 var *arg = evaluate(nextarg(&op1), v1);
2584                                 copyvar(v, arg);
2585                                 v->type |= VF_CHILD;
2586                                 v->x.parent = arg;
2587                                 if (++v - vbeg >= op->r.f->nargs)
2588                                         break;
2589                         }
2590
2591                         v = fnargs;
2592                         fnargs = vbeg;
2593                         sv_progname = g_progname;
2594
2595                         res = evaluate(op->r.f->body.first, res);
2596
2597                         g_progname = sv_progname;
2598                         nvfree(fnargs);
2599                         fnargs = v;
2600
2601                         break;
2602                 }
2603
2604                 case XC( OC_GETLINE ):
2605                 case XC( OC_PGETLINE ): {
2606                         rstream *rsm;
2607                         int i;
2608
2609                         if (op1) {
2610                                 rsm = newfile(L.s);
2611                                 if (!rsm->F) {
2612                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2613                                                 rsm->F = popen(L.s, "r");
2614                                                 rsm->is_pipe = TRUE;
2615                                         } else {
2616                                                 rsm->F = fopen_for_read(L.s);           /* not xfopen! */
2617                                         }
2618                                 }
2619                         } else {
2620                                 if (!iF)
2621                                         iF = next_input_file();
2622                                 rsm = iF;
2623                         }
2624
2625                         if (!rsm->F) {
2626                                 setvar_i(intvar[ERRNO], errno);
2627                                 setvar_i(res, -1);
2628                                 break;
2629                         }
2630
2631                         if (!op->r.n)
2632                                 R.v = intvar[F0];
2633
2634                         i = awk_getline(rsm, R.v);
2635                         if (i > 0 && !op1) {
2636                                 incvar(intvar[FNR]);
2637                                 incvar(intvar[NR]);
2638                         }
2639                         setvar_i(res, i);
2640                         break;
2641                 }
2642
2643                 /* simple builtins */
2644                 case XC( OC_FBLTIN ): {
2645                         int i;
2646                         rstream *rsm;
2647                         double R_d = R_d; /* for compiler */
2648
2649                         switch (opn) {
2650                         case F_in:
2651                                 R_d = (int)L_d;
2652                                 break;
2653
2654                         case F_rn:
2655                                 R_d = (double)rand() / (double)RAND_MAX;
2656                                 break;
2657 #if ENABLE_FEATURE_AWK_LIBM
2658                         case F_co:
2659                                 R_d = cos(L_d);
2660                                 break;
2661
2662                         case F_ex:
2663                                 R_d = exp(L_d);
2664                                 break;
2665
2666                         case F_lg:
2667                                 R_d = log(L_d);
2668                                 break;
2669
2670                         case F_si:
2671                                 R_d = sin(L_d);
2672                                 break;
2673
2674                         case F_sq:
2675                                 R_d = sqrt(L_d);
2676                                 break;
2677 #else
2678                         case F_co:
2679                         case F_ex:
2680                         case F_lg:
2681                         case F_si:
2682                         case F_sq:
2683                                 syntax_error(EMSG_NO_MATH);
2684                                 break;
2685 #endif
2686                         case F_sr:
2687                                 R_d = (double)seed;
2688                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2689                                 srand(seed);
2690                                 break;
2691
2692                         case F_ti:
2693                                 R_d = time(NULL);
2694                                 break;
2695
2696                         case F_le:
2697                                 if (!op1)
2698                                         L.s = getvar_s(intvar[F0]);
2699                                 R_d = strlen(L.s);
2700                                 break;
2701
2702                         case F_sy:
2703                                 fflush_all();
2704                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2705                                                 ? (system(L.s) >> 8) : 0;
2706                                 break;
2707
2708                         case F_ff:
2709                                 if (!op1) {
2710                                         fflush(stdout);
2711                                 } else if (L.s && *L.s) {
2712                                         rsm = newfile(L.s);
2713                                         fflush(rsm->F);
2714                                 } else {
2715                                         fflush_all();
2716                                 }
2717                                 break;
2718
2719                         case F_cl:
2720                                 i = 0;
2721                                 rsm = (rstream *)hash_search(fdhash, L.s);
2722                                 if (rsm) {
2723                                         i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2724                                         free(rsm->buffer);
2725                                         hash_remove(fdhash, L.s);
2726                                 }
2727                                 if (i != 0)
2728                                         setvar_i(intvar[ERRNO], errno);
2729                                 R_d = (double)i;
2730                                 break;
2731                         }
2732                         setvar_i(res, R_d);
2733                         break;
2734                 }
2735
2736                 case XC( OC_BUILTIN ):
2737                         res = exec_builtin(op, res);
2738                         break;
2739
2740                 case XC( OC_SPRINTF ):
2741                         setvar_p(res, awk_printf(op1));
2742                         break;
2743
2744                 case XC( OC_UNARY ): {
2745                         double Ld, R_d;
2746
2747                         Ld = R_d = getvar_i(R.v);
2748                         switch (opn) {
2749                         case 'P':
2750                                 Ld = ++R_d;
2751                                 goto r_op_change;
2752                         case 'p':
2753                                 R_d++;
2754                                 goto r_op_change;
2755                         case 'M':
2756                                 Ld = --R_d;
2757                                 goto r_op_change;
2758                         case 'm':
2759                                 R_d--;
2760  r_op_change:
2761                                 setvar_i(R.v, R_d);
2762                                 break;
2763                         case '!':
2764                                 Ld = !istrue(R.v);
2765                                 break;
2766                         case '-':
2767                                 Ld = -R_d;
2768                                 break;
2769                         }
2770                         setvar_i(res, Ld);
2771                         break;
2772                 }
2773
2774                 case XC( OC_FIELD ): {
2775                         int i = (int)getvar_i(R.v);
2776                         if (i == 0) {
2777                                 res = intvar[F0];
2778                         } else {
2779                                 split_f0();
2780                                 if (i > nfields)
2781                                         fsrealloc(i);
2782                                 res = &Fields[i - 1];
2783                         }
2784                         break;
2785                 }
2786
2787                 /* concatenation (" ") and index joining (",") */
2788                 case XC( OC_CONCAT ):
2789                 case XC( OC_COMMA ): {
2790                         const char *sep = "";
2791                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2792                                 sep = getvar_s(intvar[SUBSEP]);
2793                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2794                         break;
2795                 }
2796
2797                 case XC( OC_LAND ):
2798                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2799                         break;
2800
2801                 case XC( OC_LOR ):
2802                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2803                         break;
2804
2805                 case XC( OC_BINARY ):
2806                 case XC( OC_REPLACE ): {
2807                         double R_d = getvar_i(R.v);
2808                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2809                         switch (opn) {
2810                         case '+':
2811                                 L_d += R_d;
2812                                 break;
2813                         case '-':
2814                                 L_d -= R_d;
2815                                 break;
2816                         case '*':
2817                                 L_d *= R_d;
2818                                 break;
2819                         case '/':
2820                                 if (R_d == 0)
2821                                         syntax_error(EMSG_DIV_BY_ZERO);
2822                                 L_d /= R_d;
2823                                 break;
2824                         case '&':
2825 #if ENABLE_FEATURE_AWK_LIBM
2826                                 L_d = pow(L_d, R_d);
2827 #else
2828                                 syntax_error(EMSG_NO_MATH);
2829 #endif
2830                                 break;
2831                         case '%':
2832                                 if (R_d == 0)
2833                                         syntax_error(EMSG_DIV_BY_ZERO);
2834                                 L_d -= (int)(L_d / R_d) * R_d;
2835                                 break;
2836                         }
2837                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2838                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2839                         break;
2840                 }
2841
2842                 case XC( OC_COMPARE ): {
2843                         int i = i; /* for compiler */
2844                         double Ld;
2845
2846                         if (is_numeric(L.v) && is_numeric(R.v)) {
2847                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2848                         } else {
2849                                 const char *l = getvar_s(L.v);
2850                                 const char *r = getvar_s(R.v);
2851                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2852                         }
2853                         switch (opn & 0xfe) {
2854                         case 0:
2855                                 i = (Ld > 0);
2856                                 break;
2857                         case 2:
2858                                 i = (Ld >= 0);
2859                                 break;
2860                         case 4:
2861                                 i = (Ld == 0);
2862                                 break;
2863                         }
2864                         setvar_i(res, (i == 0) ^ (opn & 1));
2865                         break;
2866                 }
2867
2868                 default:
2869                         syntax_error(EMSG_POSSIBLE_ERROR);
2870                 }
2871                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2872                         op = op->a.n;
2873                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2874                         break;
2875                 if (nextrec)
2876                         break;
2877         } /* while (op) */
2878
2879         nvfree(v1);
2880         return res;
2881 #undef fnargs
2882 #undef seed
2883 #undef sreg
2884 }
2885
2886
2887 /* -------- main & co. -------- */
2888
2889 static int awk_exit(int r)
2890 {
2891         var tv;
2892         unsigned i;
2893         hash_item *hi;
2894
2895         zero_out_var(&tv);
2896
2897         if (!exiting) {
2898                 exiting = TRUE;
2899                 nextrec = FALSE;
2900                 evaluate(endseq.first, &tv);
2901         }
2902
2903         /* waiting for children */
2904         for (i = 0; i < fdhash->csize; i++) {
2905                 hi = fdhash->items[i];
2906                 while (hi) {
2907                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2908                                 pclose(hi->data.rs.F);
2909                         hi = hi->next;
2910                 }
2911         }
2912
2913         exit(r);
2914 }
2915
2916 /* if expr looks like "var=value", perform assignment and return 1,
2917  * otherwise return 0 */
2918 static int is_assignment(const char *expr)
2919 {
2920         char *exprc, *s, *s0, *s1;
2921
2922         exprc = xstrdup(expr);
2923         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2924                 free(exprc);
2925                 return FALSE;
2926         }
2927
2928         *s++ = '\0';
2929         s0 = s1 = s;
2930         while (*s)
2931                 *s1++ = nextchar(&s);
2932
2933         *s1 = '\0';
2934         setvar_u(newvar(exprc), s0);
2935         free(exprc);
2936         return TRUE;
2937 }
2938
2939 /* switch to next input file */
2940 static rstream *next_input_file(void)
2941 {
2942 #define rsm          (G.next_input_file__rsm)
2943 #define files_happen (G.next_input_file__files_happen)
2944
2945         FILE *F = NULL;
2946         const char *fname, *ind;
2947
2948         if (rsm.F)
2949                 fclose(rsm.F);
2950         rsm.F = NULL;
2951         rsm.pos = rsm.adv = 0;
2952
2953         do {
2954                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2955                         if (files_happen)
2956                                 return NULL;
2957                         fname = "-";
2958                         F = stdin;
2959                 } else {
2960                         ind = getvar_s(incvar(intvar[ARGIND]));
2961                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2962                         if (fname && *fname && !is_assignment(fname))
2963                                 F = xfopen_stdin(fname);
2964                 }
2965         } while (!F);
2966
2967         files_happen = TRUE;
2968         setvar_s(intvar[FILENAME], fname);
2969         rsm.F = F;
2970         return &rsm;
2971 #undef rsm
2972 #undef files_happen
2973 }
2974
2975 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2976 int awk_main(int argc, char **argv)
2977 {
2978         unsigned opt;
2979         char *opt_F, *opt_W;
2980         llist_t *list_v = NULL;
2981         llist_t *list_f = NULL;
2982         int i, j;
2983         var *v;
2984         var tv;
2985         char **envp;
2986         char *vnames = (char *)vNames; /* cheat */
2987         char *vvalues = (char *)vValues;
2988
2989         INIT_G();
2990
2991         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2992          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2993         if (ENABLE_LOCALE_SUPPORT)
2994                 setlocale(LC_NUMERIC, "C");
2995
2996         zero_out_var(&tv);
2997
2998         /* allocate global buffer */
2999         g_buf = xmalloc(MAXVARFMT + 1);
3000
3001         vhash = hash_init();
3002         ahash = hash_init();
3003         fdhash = hash_init();
3004         fnhash = hash_init();
3005
3006         /* initialize variables */
3007         for (i = 0; *vnames; i++) {
3008                 intvar[i] = v = newvar(nextword(&vnames));
3009                 if (*vvalues != '\377')
3010                         setvar_s(v, nextword(&vvalues));
3011                 else
3012                         setvar_i(v, 0);
3013
3014                 if (*vnames == '*') {
3015                         v->type |= VF_SPECIAL;
3016                         vnames++;
3017                 }
3018         }
3019
3020         handle_special(intvar[FS]);
3021         handle_special(intvar[RS]);
3022
3023         newfile("/dev/stdin")->F = stdin;
3024         newfile("/dev/stdout")->F = stdout;
3025         newfile("/dev/stderr")->F = stderr;
3026
3027         /* Huh, people report that sometimes environ is NULL. Oh well. */
3028         if (environ) for (envp = environ; *envp; envp++) {
3029                 /* environ is writable, thus we don't strdup it needlessly */
3030                 char *s = *envp;
3031                 char *s1 = strchr(s, '=');
3032                 if (s1) {
3033                         *s1 = '\0';
3034                         /* Both findvar and setvar_u take const char*
3035                          * as 2nd arg -> environment is not trashed */
3036                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3037                         *s1 = '=';
3038                 }
3039         }
3040         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3041         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3042         argv += optind;
3043         argc -= optind;
3044         if (opt & 0x1)
3045                 setvar_s(intvar[FS], opt_F); // -F
3046         while (list_v) { /* -v */
3047                 if (!is_assignment(llist_pop(&list_v)))
3048                         bb_show_usage();
3049         }
3050         if (list_f) { /* -f */
3051                 do {
3052                         char *s = NULL;
3053                         FILE *from_file;
3054
3055                         g_progname = llist_pop(&list_f);
3056                         from_file = xfopen_stdin(g_progname);
3057                         /* one byte is reserved for some trick in next_token */
3058                         for (i = j = 1; j > 0; i += j) {
3059                                 s = xrealloc(s, i + 4096);
3060                                 j = fread(s + i, 1, 4094, from_file);
3061                         }
3062                         s[i] = '\0';
3063                         fclose(from_file);
3064                         parse_program(s + 1);
3065                         free(s);
3066                 } while (list_f);
3067                 argc++;
3068         } else { // no -f: take program from 1st parameter
3069                 if (!argc)
3070                         bb_show_usage();
3071                 g_progname = "cmd. line";
3072                 parse_program(*argv++);
3073         }
3074         if (opt & 0x8) // -W
3075                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3076
3077         /* fill in ARGV array */
3078         setvar_i(intvar[ARGC], argc);
3079         setari_u(intvar[ARGV], 0, "awk");
3080         i = 0;
3081         while (*argv)
3082                 setari_u(intvar[ARGV], ++i, *argv++);
3083
3084         evaluate(beginseq.first, &tv);
3085         if (!mainseq.first && !endseq.first)
3086                 awk_exit(EXIT_SUCCESS);
3087
3088         /* input file could already be opened in BEGIN block */
3089         if (!iF)
3090                 iF = next_input_file();
3091
3092         /* passing through input files */
3093         while (iF) {
3094                 nextfile = FALSE;
3095                 setvar_i(intvar[FNR], 0);
3096
3097                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3098                         nextrec = FALSE;
3099                         incvar(intvar[NR]);
3100                         incvar(intvar[FNR]);
3101                         evaluate(mainseq.first, &tv);
3102
3103                         if (nextfile)
3104                                 break;
3105                 }
3106
3107                 if (i < 0)
3108                         syntax_error(strerror(errno));
3109
3110                 iF = next_input_file();
3111         }
3112
3113         awk_exit(EXIT_SUCCESS);
3114         /*return 0;*/
3115 }