libbb: nonblock_safe_read->nonblock_immune_read, remove unused param of xmalloc_reads
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //usage:#define awk_trivial_usage
11 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage:       "Options:"
14 //usage:     "\n        -v VAR=VAL      Set variable"
15 //usage:     "\n        -F SEP          Use SEP as field separator"
16 //usage:     "\n        -f FILE         Read program from FILE"
17
18 #include "libbb.h"
19 #include "xregex.h"
20 #include <math.h>
21
22 /* This is a NOEXEC applet. Be very careful! */
23
24
25 /* If you comment out one of these below, it will be #defined later
26  * to perform debug printfs to stderr: */
27 #define debug_printf_walker(...)  do {} while (0)
28 #define debug_printf_eval(...)  do {} while (0)
29
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #endif
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #endif
36
37
38
39 #define MAXVARFMT       240
40 #define MINNVBLOCK      64
41
42 /* variable flags */
43 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
44 #define VF_ARRAY        0x0002  /* 1 = it's an array */
45
46 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
47 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
48 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
49 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
50 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
51 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
52 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
53
54 /* these flags are static, don't change them when value is changed */
55 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56
57 typedef struct walker_list {
58         char *end;
59         char *cur;
60         struct walker_list *prev;
61         char wbuf[1];
62 } walker_list;
63
64 /* Variable */
65 typedef struct var_s {
66         unsigned type;            /* flags */
67         double number;
68         char *string;
69         union {
70                 int aidx;               /* func arg idx (for compilation stage) */
71                 struct xhash_s *array;  /* array ptr */
72                 struct var_s *parent;   /* for func args, ptr to actual parameter */
73                 walker_list *walker;    /* list of array elements (for..in) */
74         } x;
75 } var;
76
77 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
78 typedef struct chain_s {
79         struct node_s *first;
80         struct node_s *last;
81         const char *programname;
82 } chain;
83
84 /* Function */
85 typedef struct func_s {
86         unsigned nargs;
87         struct chain_s body;
88 } func;
89
90 /* I/O stream */
91 typedef struct rstream_s {
92         FILE *F;
93         char *buffer;
94         int adv;
95         int size;
96         int pos;
97         smallint is_pipe;
98 } rstream;
99
100 typedef struct hash_item_s {
101         union {
102                 struct var_s v;         /* variable/array hash */
103                 struct rstream_s rs;    /* redirect streams hash */
104                 struct func_s f;        /* functions hash */
105         } data;
106         struct hash_item_s *next;       /* next in chain */
107         char name[1];                   /* really it's longer */
108 } hash_item;
109
110 typedef struct xhash_s {
111         unsigned nel;           /* num of elements */
112         unsigned csize;         /* current hash size */
113         unsigned nprime;        /* next hash size in PRIMES[] */
114         unsigned glen;          /* summary length of item names */
115         struct hash_item_s **items;
116 } xhash;
117
118 /* Tree node */
119 typedef struct node_s {
120         uint32_t info;
121         unsigned lineno;
122         union {
123                 struct node_s *n;
124                 var *v;
125                 int aidx;
126                 char *new_progname;
127                 regex_t *re;
128         } l;
129         union {
130                 struct node_s *n;
131                 regex_t *ire;
132                 func *f;
133         } r;
134         union {
135                 struct node_s *n;
136         } a;
137 } node;
138
139 /* Block of temporary variables */
140 typedef struct nvblock_s {
141         int size;
142         var *pos;
143         struct nvblock_s *prev;
144         struct nvblock_s *next;
145         var nv[];
146 } nvblock;
147
148 typedef struct tsplitter_s {
149         node n;
150         regex_t re[2];
151 } tsplitter;
152
153 /* simple token classes */
154 /* Order and hex values are very important!!!  See next_token() */
155 #define TC_SEQSTART      1                              /* ( */
156 #define TC_SEQTERM      (1 << 1)                /* ) */
157 #define TC_REGEXP       (1 << 2)                /* /.../ */
158 #define TC_OUTRDR       (1 << 3)                /* | > >> */
159 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
160 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
161 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
162 #define TC_IN           (1 << 7)
163 #define TC_COMMA        (1 << 8)
164 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
165 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
166 #define TC_ARRTERM      (1 << 11)               /* ] */
167 #define TC_GRPSTART     (1 << 12)               /* { */
168 #define TC_GRPTERM      (1 << 13)               /* } */
169 #define TC_SEMICOL      (1 << 14)
170 #define TC_NEWLINE      (1 << 15)
171 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
172 #define TC_WHILE        (1 << 17)
173 #define TC_ELSE         (1 << 18)
174 #define TC_BUILTIN      (1 << 19)
175 #define TC_GETLINE      (1 << 20)
176 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
177 #define TC_BEGIN        (1 << 22)
178 #define TC_END          (1 << 23)
179 #define TC_EOF          (1 << 24)
180 #define TC_VARIABLE     (1 << 25)
181 #define TC_ARRAY        (1 << 26)
182 #define TC_FUNCTION     (1 << 27)
183 #define TC_STRING       (1 << 28)
184 #define TC_NUMBER       (1 << 29)
185
186 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
187
188 /* combined token classes */
189 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
190 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
191 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
192                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
193
194 #define TC_STATEMNT (TC_STATX | TC_WHILE)
195 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
196
197 /* word tokens, cannot mean something else if not expected */
198 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
199                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
200
201 /* discard newlines after these */
202 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
203                    | TC_BINOP | TC_OPTERM)
204
205 /* what can expression begin with */
206 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
207 /* what can group begin with */
208 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
209
210 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
211 /* operator is inserted between them */
212 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
213                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
214 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
215
216 #define OF_RES1    0x010000
217 #define OF_RES2    0x020000
218 #define OF_STR1    0x040000
219 #define OF_STR2    0x080000
220 #define OF_NUM1    0x100000
221 #define OF_CHECKED 0x200000
222
223 /* combined operator flags */
224 #define xx      0
225 #define xV      OF_RES2
226 #define xS      (OF_RES2 | OF_STR2)
227 #define Vx      OF_RES1
228 #define VV      (OF_RES1 | OF_RES2)
229 #define Nx      (OF_RES1 | OF_NUM1)
230 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
231 #define Sx      (OF_RES1 | OF_STR1)
232 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
233 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
234
235 #define OPCLSMASK 0xFF00
236 #define OPNMASK   0x007F
237
238 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
239  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
240  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
241  */
242 #define P(x)      (x << 24)
243 #define PRIMASK   0x7F000000
244 #define PRIMASK2  0x7E000000
245
246 /* Operation classes */
247
248 #define SHIFT_TIL_THIS  0x0600
249 #define RECUR_FROM_THIS 0x1000
250
251 enum {
252         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
253         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
254
255         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
256         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
257         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
258
259         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
260         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
261         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
262         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
263         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
264         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
265         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
266         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
267         OC_DONE = 0x2800,
268
269         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
270         ST_WHILE = 0x3300
271 };
272
273 /* simple builtins */
274 enum {
275         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
276         F_ti,   F_le,   F_sy,   F_ff,   F_cl
277 };
278
279 /* builtins */
280 enum {
281         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
282         B_ge,   B_gs,   B_su,
283         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
284 };
285
286 /* tokens and their corresponding info values */
287
288 #define NTC     "\377"  /* switch to next token class (tc<<1) */
289 #define NTCC    '\377'
290
291 #define OC_B  OC_BUILTIN
292
293 static const char tokenlist[] ALIGN1 =
294         "\1("         NTC
295         "\1)"         NTC
296         "\1/"         NTC                                   /* REGEXP */
297         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
298         "\2++"        "\2--"        NTC                     /* UOPPOST */
299         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
300         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
301         "\2*="        "\2/="        "\2%="      "\2^="
302         "\1+"         "\1-"         "\3**="     "\2**"
303         "\1/"         "\1%"         "\1^"       "\1*"
304         "\2!="        "\2>="        "\2<="      "\1>"
305         "\1<"         "\2!~"        "\1~"       "\2&&"
306         "\2||"        "\1?"         "\1:"       NTC
307         "\2in"        NTC
308         "\1,"         NTC
309         "\1|"         NTC
310         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
311         "\1]"         NTC
312         "\1{"         NTC
313         "\1}"         NTC
314         "\1;"         NTC
315         "\1\n"        NTC
316         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
317         "\10continue" "\6delete"    "\5print"
318         "\6printf"    "\4next"      "\10nextfile"
319         "\6return"    "\4exit"      NTC
320         "\5while"     NTC
321         "\4else"      NTC
322
323         "\3and"       "\5compl"     "\6lshift"  "\2or"
324         "\6rshift"    "\3xor"
325         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
326         "\3cos"       "\3exp"       "\3int"     "\3log"
327         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
328         "\6gensub"    "\4gsub"      "\5index"   "\6length"
329         "\5match"     "\5split"     "\7sprintf" "\3sub"
330         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
331         "\7tolower"   "\7toupper"   NTC
332         "\7getline"   NTC
333         "\4func"      "\10function" NTC
334         "\5BEGIN"     NTC
335         "\3END"
336         /* compiler adds trailing "\0" */
337         ;
338
339 static const uint32_t tokeninfo[] = {
340         0,
341         0,
342         OC_REGEXP,
343         xS|'a',                  xS|'w',                  xS|'|',
344         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
345         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
346         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
347         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
348         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
349         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
350         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
351         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
352         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
353         OC_IN|SV|P(49), /* in */
354         OC_COMMA|SS|P(80),
355         OC_PGETLINE|SV|P(37),
356         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
357         0, /* ] */
358         0,
359         0,
360         0,
361         0, /* \n */
362         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
363         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
364         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
365         OC_RETURN|Vx, OC_EXIT|Nx,
366         ST_WHILE,
367         0, /* else */
368
369         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
376         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
378         OC_GETLINE|SV|P(0),
379         0,                 0,
380         0,
381         0 /* END */
382 };
383
384 /* internal variable names and their initial values       */
385 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
386 enum {
387         CONVFMT,    OFMT,       FS,         OFS,
388         ORS,        RS,         RT,         FILENAME,
389         SUBSEP,     F0,         ARGIND,     ARGC,
390         ARGV,       ERRNO,      FNR,        NR,
391         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
392 };
393
394 static const char vNames[] ALIGN1 =
395         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
396         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
397         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
398         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
399         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
400
401 static const char vValues[] ALIGN1 =
402         "%.6g\0"    "%.6g\0"    " \0"       " \0"
403         "\n\0"      "\n\0"      "\0"        "\0"
404         "\034\0"    "\0"        "\377";
405
406 /* hash size may grow to these values */
407 #define FIRST_PRIME 61
408 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
409
410
411 /* Globals. Split in two parts so that first one is addressed
412  * with (mostly short) negative offsets.
413  * NB: it's unsafe to put members of type "double"
414  * into globals2 (gcc may fail to align them).
415  */
416 struct globals {
417         double t_double;
418         chain beginseq, mainseq, endseq;
419         chain *seq;
420         node *break_ptr, *continue_ptr;
421         rstream *iF;
422         xhash *vhash, *ahash, *fdhash, *fnhash;
423         const char *g_progname;
424         int g_lineno;
425         int nfields;
426         int maxfields; /* used in fsrealloc() only */
427         var *Fields;
428         nvblock *g_cb;
429         char *g_pos;
430         char *g_buf;
431         smallint icase;
432         smallint exiting;
433         smallint nextrec;
434         smallint nextfile;
435         smallint is_f0_split;
436 };
437 struct globals2 {
438         uint32_t t_info; /* often used */
439         uint32_t t_tclass;
440         char *t_string;
441         int t_lineno;
442         int t_rollback;
443
444         var *intvar[NUM_INTERNAL_VARS]; /* often used */
445
446         /* former statics from various functions */
447         char *split_f0__fstrings;
448
449         uint32_t next_token__save_tclass;
450         uint32_t next_token__save_info;
451         uint32_t next_token__ltclass;
452         smallint next_token__concat_inserted;
453
454         smallint next_input_file__files_happen;
455         rstream next_input_file__rsm;
456
457         var *evaluate__fnargs;
458         unsigned evaluate__seed;
459         regex_t evaluate__sreg;
460
461         var ptest__v;
462
463         tsplitter exec_builtin__tspl;
464
465         /* biggest and least used members go last */
466         tsplitter fsplitter, rsplitter;
467 };
468 #define G1 (ptr_to_globals[-1])
469 #define G (*(struct globals2 *)ptr_to_globals)
470 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471 /*char G1size[sizeof(G1)]; - 0x74 */
472 /*char Gsize[sizeof(G)]; - 0x1c4 */
473 /* Trying to keep most of members accessible with short offsets: */
474 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475 #define t_double     (G1.t_double    )
476 #define beginseq     (G1.beginseq    )
477 #define mainseq      (G1.mainseq     )
478 #define endseq       (G1.endseq      )
479 #define seq          (G1.seq         )
480 #define break_ptr    (G1.break_ptr   )
481 #define continue_ptr (G1.continue_ptr)
482 #define iF           (G1.iF          )
483 #define vhash        (G1.vhash       )
484 #define ahash        (G1.ahash       )
485 #define fdhash       (G1.fdhash      )
486 #define fnhash       (G1.fnhash      )
487 #define g_progname   (G1.g_progname  )
488 #define g_lineno     (G1.g_lineno    )
489 #define nfields      (G1.nfields     )
490 #define maxfields    (G1.maxfields   )
491 #define Fields       (G1.Fields      )
492 #define g_cb         (G1.g_cb        )
493 #define g_pos        (G1.g_pos       )
494 #define g_buf        (G1.g_buf       )
495 #define icase        (G1.icase       )
496 #define exiting      (G1.exiting     )
497 #define nextrec      (G1.nextrec     )
498 #define nextfile     (G1.nextfile    )
499 #define is_f0_split  (G1.is_f0_split )
500 #define t_info       (G.t_info      )
501 #define t_tclass     (G.t_tclass    )
502 #define t_string     (G.t_string    )
503 #define t_lineno     (G.t_lineno    )
504 #define t_rollback   (G.t_rollback  )
505 #define intvar       (G.intvar      )
506 #define fsplitter    (G.fsplitter   )
507 #define rsplitter    (G.rsplitter   )
508 #define INIT_G() do { \
509         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510         G.next_token__ltclass = TC_OPTERM; \
511         G.evaluate__seed = 1; \
512 } while (0)
513
514
515 /* function prototypes */
516 static void handle_special(var *);
517 static node *parse_expr(uint32_t);
518 static void chain_group(void);
519 static var *evaluate(node *, var *);
520 static rstream *next_input_file(void);
521 static int fmt_num(char *, int, const char *, double, int);
522 static int awk_exit(int) NORETURN;
523
524 /* ---- error handling ---- */
525
526 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
536
537 static void zero_out_var(var *vp)
538 {
539         memset(vp, 0, sizeof(*vp));
540 }
541
542 static void syntax_error(const char *message) NORETURN;
543 static void syntax_error(const char *message)
544 {
545         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
546 }
547
548 /* ---- hash stuff ---- */
549
550 static unsigned hashidx(const char *name)
551 {
552         unsigned idx = 0;
553
554         while (*name)
555                 idx = *name++ + (idx << 6) - idx;
556         return idx;
557 }
558
559 /* create new hash */
560 static xhash *hash_init(void)
561 {
562         xhash *newhash;
563
564         newhash = xzalloc(sizeof(*newhash));
565         newhash->csize = FIRST_PRIME;
566         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
567
568         return newhash;
569 }
570
571 /* find item in hash, return ptr to data, NULL if not found */
572 static void *hash_search(xhash *hash, const char *name)
573 {
574         hash_item *hi;
575
576         hi = hash->items[hashidx(name) % hash->csize];
577         while (hi) {
578                 if (strcmp(hi->name, name) == 0)
579                         return &hi->data;
580                 hi = hi->next;
581         }
582         return NULL;
583 }
584
585 /* grow hash if it becomes too big */
586 static void hash_rebuild(xhash *hash)
587 {
588         unsigned newsize, i, idx;
589         hash_item **newitems, *hi, *thi;
590
591         if (hash->nprime == ARRAY_SIZE(PRIMES))
592                 return;
593
594         newsize = PRIMES[hash->nprime++];
595         newitems = xzalloc(newsize * sizeof(newitems[0]));
596
597         for (i = 0; i < hash->csize; i++) {
598                 hi = hash->items[i];
599                 while (hi) {
600                         thi = hi;
601                         hi = thi->next;
602                         idx = hashidx(thi->name) % newsize;
603                         thi->next = newitems[idx];
604                         newitems[idx] = thi;
605                 }
606         }
607
608         free(hash->items);
609         hash->csize = newsize;
610         hash->items = newitems;
611 }
612
613 /* find item in hash, add it if necessary. Return ptr to data */
614 static void *hash_find(xhash *hash, const char *name)
615 {
616         hash_item *hi;
617         unsigned idx;
618         int l;
619
620         hi = hash_search(hash, name);
621         if (!hi) {
622                 if (++hash->nel / hash->csize > 10)
623                         hash_rebuild(hash);
624
625                 l = strlen(name) + 1;
626                 hi = xzalloc(sizeof(*hi) + l);
627                 strcpy(hi->name, name);
628
629                 idx = hashidx(name) % hash->csize;
630                 hi->next = hash->items[idx];
631                 hash->items[idx] = hi;
632                 hash->glen += l;
633         }
634         return &hi->data;
635 }
636
637 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
638 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
639 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
640 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
641
642 static void hash_remove(xhash *hash, const char *name)
643 {
644         hash_item *hi, **phi;
645
646         phi = &hash->items[hashidx(name) % hash->csize];
647         while (*phi) {
648                 hi = *phi;
649                 if (strcmp(hi->name, name) == 0) {
650                         hash->glen -= (strlen(name) + 1);
651                         hash->nel--;
652                         *phi = hi->next;
653                         free(hi);
654                         break;
655                 }
656                 phi = &hi->next;
657         }
658 }
659
660 /* ------ some useful functions ------ */
661
662 static char *skip_spaces(char *p)
663 {
664         while (1) {
665                 if (*p == '\\' && p[1] == '\n') {
666                         p++;
667                         t_lineno++;
668                 } else if (*p != ' ' && *p != '\t') {
669                         break;
670                 }
671                 p++;
672         }
673         return p;
674 }
675
676 /* returns old *s, advances *s past word and terminating NUL */
677 static char *nextword(char **s)
678 {
679         char *p = *s;
680         while (*(*s)++ != '\0')
681                 continue;
682         return p;
683 }
684
685 static char nextchar(char **s)
686 {
687         char c, *pps;
688
689         c = *(*s)++;
690         pps = *s;
691         if (c == '\\')
692                 c = bb_process_escape_sequence((const char**)s);
693         if (c == '\\' && *s == pps) { /* unrecognized \z? */
694                 c = *(*s); /* yes, fetch z */
695                 if (c)
696                         (*s)++; /* advance unless z = NUL */
697         }
698         return c;
699 }
700
701 static ALWAYS_INLINE int isalnum_(int c)
702 {
703         return (isalnum(c) || c == '_');
704 }
705
706 static double my_strtod(char **pp)
707 {
708         char *cp = *pp;
709         if (ENABLE_DESKTOP && cp[0] == '0') {
710                 /* Might be hex or octal integer: 0x123abc or 07777 */
711                 char c = (cp[1] | 0x20);
712                 if (c == 'x' || isdigit(cp[1])) {
713                         unsigned long long ull = strtoull(cp, pp, 0);
714                         if (c == 'x')
715                                 return ull;
716                         c = **pp;
717                         if (!isdigit(c) && c != '.')
718                                 return ull;
719                         /* else: it may be a floating number. Examples:
720                          * 009.123 (*pp points to '9')
721                          * 000.123 (*pp points to '.')
722                          * fall through to strtod.
723                          */
724                 }
725         }
726         return strtod(cp, pp);
727 }
728
729 /* -------- working with variables (set/get/copy/etc) -------- */
730
731 static xhash *iamarray(var *v)
732 {
733         var *a = v;
734
735         while (a->type & VF_CHILD)
736                 a = a->x.parent;
737
738         if (!(a->type & VF_ARRAY)) {
739                 a->type |= VF_ARRAY;
740                 a->x.array = hash_init();
741         }
742         return a->x.array;
743 }
744
745 static void clear_array(xhash *array)
746 {
747         unsigned i;
748         hash_item *hi, *thi;
749
750         for (i = 0; i < array->csize; i++) {
751                 hi = array->items[i];
752                 while (hi) {
753                         thi = hi;
754                         hi = hi->next;
755                         free(thi->data.v.string);
756                         free(thi);
757                 }
758                 array->items[i] = NULL;
759         }
760         array->glen = array->nel = 0;
761 }
762
763 /* clear a variable */
764 static var *clrvar(var *v)
765 {
766         if (!(v->type & VF_FSTR))
767                 free(v->string);
768
769         v->type &= VF_DONTTOUCH;
770         v->type |= VF_DIRTY;
771         v->string = NULL;
772         return v;
773 }
774
775 /* assign string value to variable */
776 static var *setvar_p(var *v, char *value)
777 {
778         clrvar(v);
779         v->string = value;
780         handle_special(v);
781         return v;
782 }
783
784 /* same as setvar_p but make a copy of string */
785 static var *setvar_s(var *v, const char *value)
786 {
787         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
788 }
789
790 /* same as setvar_s but sets USER flag */
791 static var *setvar_u(var *v, const char *value)
792 {
793         v = setvar_s(v, value);
794         v->type |= VF_USER;
795         return v;
796 }
797
798 /* set array element to user string */
799 static void setari_u(var *a, int idx, const char *s)
800 {
801         var *v;
802
803         v = findvar(iamarray(a), itoa(idx));
804         setvar_u(v, s);
805 }
806
807 /* assign numeric value to variable */
808 static var *setvar_i(var *v, double value)
809 {
810         clrvar(v);
811         v->type |= VF_NUMBER;
812         v->number = value;
813         handle_special(v);
814         return v;
815 }
816
817 static const char *getvar_s(var *v)
818 {
819         /* if v is numeric and has no cached string, convert it to string */
820         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
821                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
822                 v->string = xstrdup(g_buf);
823                 v->type |= VF_CACHED;
824         }
825         return (v->string == NULL) ? "" : v->string;
826 }
827
828 static double getvar_i(var *v)
829 {
830         char *s;
831
832         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
833                 v->number = 0;
834                 s = v->string;
835                 if (s && *s) {
836                         debug_printf_eval("getvar_i: '%s'->", s);
837                         v->number = my_strtod(&s);
838                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
839                         if (v->type & VF_USER) {
840                                 s = skip_spaces(s);
841                                 if (*s != '\0')
842                                         v->type &= ~VF_USER;
843                         }
844                 } else {
845                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
846                         v->type &= ~VF_USER;
847                 }
848                 v->type |= VF_CACHED;
849         }
850         debug_printf_eval("getvar_i: %f\n", v->number);
851         return v->number;
852 }
853
854 /* Used for operands of bitwise ops */
855 static unsigned long getvar_i_int(var *v)
856 {
857         double d = getvar_i(v);
858
859         /* Casting doubles to longs is undefined for values outside
860          * of target type range. Try to widen it as much as possible */
861         if (d >= 0)
862                 return (unsigned long)d;
863         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
864         return - (long) (unsigned long) (-d);
865 }
866
867 static var *copyvar(var *dest, const var *src)
868 {
869         if (dest != src) {
870                 clrvar(dest);
871                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
872                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
873                 dest->number = src->number;
874                 if (src->string)
875                         dest->string = xstrdup(src->string);
876         }
877         handle_special(dest);
878         return dest;
879 }
880
881 static var *incvar(var *v)
882 {
883         return setvar_i(v, getvar_i(v) + 1.0);
884 }
885
886 /* return true if v is number or numeric string */
887 static int is_numeric(var *v)
888 {
889         getvar_i(v);
890         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
891 }
892
893 /* return 1 when value of v corresponds to true, 0 otherwise */
894 static int istrue(var *v)
895 {
896         if (is_numeric(v))
897                 return (v->number != 0);
898         return (v->string && v->string[0]);
899 }
900
901 /* temporary variables allocator. Last allocated should be first freed */
902 static var *nvalloc(int n)
903 {
904         nvblock *pb = NULL;
905         var *v, *r;
906         int size;
907
908         while (g_cb) {
909                 pb = g_cb;
910                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
911                         break;
912                 g_cb = g_cb->next;
913         }
914
915         if (!g_cb) {
916                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
917                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
918                 g_cb->size = size;
919                 g_cb->pos = g_cb->nv;
920                 g_cb->prev = pb;
921                 /*g_cb->next = NULL; - xzalloc did it */
922                 if (pb)
923                         pb->next = g_cb;
924         }
925
926         v = r = g_cb->pos;
927         g_cb->pos += n;
928
929         while (v < g_cb->pos) {
930                 v->type = 0;
931                 v->string = NULL;
932                 v++;
933         }
934
935         return r;
936 }
937
938 static void nvfree(var *v)
939 {
940         var *p;
941
942         if (v < g_cb->nv || v >= g_cb->pos)
943                 syntax_error(EMSG_INTERNAL_ERROR);
944
945         for (p = v; p < g_cb->pos; p++) {
946                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
947                         clear_array(iamarray(p));
948                         free(p->x.array->items);
949                         free(p->x.array);
950                 }
951                 if (p->type & VF_WALK) {
952                         walker_list *n;
953                         walker_list *w = p->x.walker;
954                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
955                         p->x.walker = NULL;
956                         while (w) {
957                                 n = w->prev;
958                                 debug_printf_walker(" free(%p)\n", w);
959                                 free(w);
960                                 w = n;
961                         }
962                 }
963                 clrvar(p);
964         }
965
966         g_cb->pos = v;
967         while (g_cb->prev && g_cb->pos == g_cb->nv) {
968                 g_cb = g_cb->prev;
969         }
970 }
971
972 /* ------- awk program text parsing ------- */
973
974 /* Parse next token pointed by global pos, place results into global ttt.
975  * If token isn't expected, give away. Return token class
976  */
977 static uint32_t next_token(uint32_t expected)
978 {
979 #define concat_inserted (G.next_token__concat_inserted)
980 #define save_tclass     (G.next_token__save_tclass)
981 #define save_info       (G.next_token__save_info)
982 /* Initialized to TC_OPTERM: */
983 #define ltclass         (G.next_token__ltclass)
984
985         char *p, *s;
986         const char *tl;
987         uint32_t tc;
988         const uint32_t *ti;
989
990         if (t_rollback) {
991                 t_rollback = FALSE;
992
993         } else if (concat_inserted) {
994                 concat_inserted = FALSE;
995                 t_tclass = save_tclass;
996                 t_info = save_info;
997
998         } else {
999                 p = g_pos;
1000  readnext:
1001                 p = skip_spaces(p);
1002                 g_lineno = t_lineno;
1003                 if (*p == '#')
1004                         while (*p != '\n' && *p != '\0')
1005                                 p++;
1006
1007                 if (*p == '\n')
1008                         t_lineno++;
1009
1010                 if (*p == '\0') {
1011                         tc = TC_EOF;
1012
1013                 } else if (*p == '\"') {
1014                         /* it's a string */
1015                         t_string = s = ++p;
1016                         while (*p != '\"') {
1017                                 char *pp;
1018                                 if (*p == '\0' || *p == '\n')
1019                                         syntax_error(EMSG_UNEXP_EOS);
1020                                 pp = p;
1021                                 *s++ = nextchar(&pp);
1022                                 p = pp;
1023                         }
1024                         p++;
1025                         *s = '\0';
1026                         tc = TC_STRING;
1027
1028                 } else if ((expected & TC_REGEXP) && *p == '/') {
1029                         /* it's regexp */
1030                         t_string = s = ++p;
1031                         while (*p != '/') {
1032                                 if (*p == '\0' || *p == '\n')
1033                                         syntax_error(EMSG_UNEXP_EOS);
1034                                 *s = *p++;
1035                                 if (*s++ == '\\') {
1036                                         char *pp = p;
1037                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1038                                         if (*p == '\\')
1039                                                 *s++ = '\\';
1040                                         if (pp == p)
1041                                                 *s++ = *p++;
1042                                         else
1043                                                 p = pp;
1044                                 }
1045                         }
1046                         p++;
1047                         *s = '\0';
1048                         tc = TC_REGEXP;
1049
1050                 } else if (*p == '.' || isdigit(*p)) {
1051                         /* it's a number */
1052                         char *pp = p;
1053                         t_double = my_strtod(&pp);
1054                         p = pp;
1055                         if (*p == '.')
1056                                 syntax_error(EMSG_UNEXP_TOKEN);
1057                         tc = TC_NUMBER;
1058
1059                 } else {
1060                         /* search for something known */
1061                         tl = tokenlist;
1062                         tc = 0x00000001;
1063                         ti = tokeninfo;
1064                         while (*tl) {
1065                                 int l = (unsigned char) *tl++;
1066                                 if (l == (unsigned char) NTCC) {
1067                                         tc <<= 1;
1068                                         continue;
1069                                 }
1070                                 /* if token class is expected,
1071                                  * token matches,
1072                                  * and it's not a longer word,
1073                                  */
1074                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1075                                  && strncmp(p, tl, l) == 0
1076                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1077                                 ) {
1078                                         /* then this is what we are looking for */
1079                                         t_info = *ti;
1080                                         p += l;
1081                                         goto token_found;
1082                                 }
1083                                 ti++;
1084                                 tl += l;
1085                         }
1086                         /* not a known token */
1087
1088                         /* is it a name? (var/array/function) */
1089                         if (!isalnum_(*p))
1090                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1091                         /* yes */
1092                         t_string = --p;
1093                         while (isalnum_(*++p)) {
1094                                 p[-1] = *p;
1095                         }
1096                         p[-1] = '\0';
1097                         tc = TC_VARIABLE;
1098                         /* also consume whitespace between functionname and bracket */
1099                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1100                                 p = skip_spaces(p);
1101                         if (*p == '(') {
1102                                 tc = TC_FUNCTION;
1103                         } else {
1104                                 if (*p == '[') {
1105                                         p++;
1106                                         tc = TC_ARRAY;
1107                                 }
1108                         }
1109  token_found: ;
1110                 }
1111                 g_pos = p;
1112
1113                 /* skipping newlines in some cases */
1114                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1115                         goto readnext;
1116
1117                 /* insert concatenation operator when needed */
1118                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1119                         concat_inserted = TRUE;
1120                         save_tclass = tc;
1121                         save_info = t_info;
1122                         tc = TC_BINOP;
1123                         t_info = OC_CONCAT | SS | P(35);
1124                 }
1125
1126                 t_tclass = tc;
1127         }
1128         ltclass = t_tclass;
1129
1130         /* Are we ready for this? */
1131         if (!(ltclass & expected))
1132                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1133                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1134
1135         return ltclass;
1136 #undef concat_inserted
1137 #undef save_tclass
1138 #undef save_info
1139 #undef ltclass
1140 }
1141
1142 static void rollback_token(void)
1143 {
1144         t_rollback = TRUE;
1145 }
1146
1147 static node *new_node(uint32_t info)
1148 {
1149         node *n;
1150
1151         n = xzalloc(sizeof(node));
1152         n->info = info;
1153         n->lineno = g_lineno;
1154         return n;
1155 }
1156
1157 static void mk_re_node(const char *s, node *n, regex_t *re)
1158 {
1159         n->info = OC_REGEXP;
1160         n->l.re = re;
1161         n->r.ire = re + 1;
1162         xregcomp(re, s, REG_EXTENDED);
1163         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1164 }
1165
1166 static node *condition(void)
1167 {
1168         next_token(TC_SEQSTART);
1169         return parse_expr(TC_SEQTERM);
1170 }
1171
1172 /* parse expression terminated by given argument, return ptr
1173  * to built subtree. Terminator is eaten by parse_expr */
1174 static node *parse_expr(uint32_t iexp)
1175 {
1176         node sn;
1177         node *cn = &sn;
1178         node *vn, *glptr;
1179         uint32_t tc, xtc;
1180         var *v;
1181
1182         sn.info = PRIMASK;
1183         sn.r.n = glptr = NULL;
1184         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1185
1186         while (!((tc = next_token(xtc)) & iexp)) {
1187
1188                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1189                         /* input redirection (<) attached to glptr node */
1190                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1191                         cn->a.n = glptr;
1192                         xtc = TC_OPERAND | TC_UOPPRE;
1193                         glptr = NULL;
1194
1195                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1196                         /* for binary and postfix-unary operators, jump back over
1197                          * previous operators with higher priority */
1198                         vn = cn;
1199                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1200                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1201                         ) {
1202                                 vn = vn->a.n;
1203                         }
1204                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1205                                 t_info += P(6);
1206                         cn = vn->a.n->r.n = new_node(t_info);
1207                         cn->a.n = vn->a.n;
1208                         if (tc & TC_BINOP) {
1209                                 cn->l.n = vn;
1210                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1211                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1212                                         /* it's a pipe */
1213                                         next_token(TC_GETLINE);
1214                                         /* give maximum priority to this pipe */
1215                                         cn->info &= ~PRIMASK;
1216                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1217                                 }
1218                         } else {
1219                                 cn->r.n = vn;
1220                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1221                         }
1222                         vn->a.n = cn;
1223
1224                 } else {
1225                         /* for operands and prefix-unary operators, attach them
1226                          * to last node */
1227                         vn = cn;
1228                         cn = vn->r.n = new_node(t_info);
1229                         cn->a.n = vn;
1230                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1231                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1232                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1233                                 /* one should be very careful with switch on tclass -
1234                                  * only simple tclasses should be used! */
1235                                 switch (tc) {
1236                                 case TC_VARIABLE:
1237                                 case TC_ARRAY:
1238                                         cn->info = OC_VAR;
1239                                         v = hash_search(ahash, t_string);
1240                                         if (v != NULL) {
1241                                                 cn->info = OC_FNARG;
1242                                                 cn->l.aidx = v->x.aidx;
1243                                         } else {
1244                                                 cn->l.v = newvar(t_string);
1245                                         }
1246                                         if (tc & TC_ARRAY) {
1247                                                 cn->info |= xS;
1248                                                 cn->r.n = parse_expr(TC_ARRTERM);
1249                                         }
1250                                         break;
1251
1252                                 case TC_NUMBER:
1253                                 case TC_STRING:
1254                                         cn->info = OC_VAR;
1255                                         v = cn->l.v = xzalloc(sizeof(var));
1256                                         if (tc & TC_NUMBER)
1257                                                 setvar_i(v, t_double);
1258                                         else
1259                                                 setvar_s(v, t_string);
1260                                         break;
1261
1262                                 case TC_REGEXP:
1263                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1264                                         break;
1265
1266                                 case TC_FUNCTION:
1267                                         cn->info = OC_FUNC;
1268                                         cn->r.f = newfunc(t_string);
1269                                         cn->l.n = condition();
1270                                         break;
1271
1272                                 case TC_SEQSTART:
1273                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1274                                         cn->a.n = vn;
1275                                         break;
1276
1277                                 case TC_GETLINE:
1278                                         glptr = cn;
1279                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1280                                         break;
1281
1282                                 case TC_BUILTIN:
1283                                         cn->l.n = condition();
1284                                         break;
1285                                 }
1286                         }
1287                 }
1288         }
1289         return sn.r.n;
1290 }
1291
1292 /* add node to chain. Return ptr to alloc'd node */
1293 static node *chain_node(uint32_t info)
1294 {
1295         node *n;
1296
1297         if (!seq->first)
1298                 seq->first = seq->last = new_node(0);
1299
1300         if (seq->programname != g_progname) {
1301                 seq->programname = g_progname;
1302                 n = chain_node(OC_NEWSOURCE);
1303                 n->l.new_progname = xstrdup(g_progname);
1304         }
1305
1306         n = seq->last;
1307         n->info = info;
1308         seq->last = n->a.n = new_node(OC_DONE);
1309
1310         return n;
1311 }
1312
1313 static void chain_expr(uint32_t info)
1314 {
1315         node *n;
1316
1317         n = chain_node(info);
1318         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1319         if (t_tclass & TC_GRPTERM)
1320                 rollback_token();
1321 }
1322
1323 static node *chain_loop(node *nn)
1324 {
1325         node *n, *n2, *save_brk, *save_cont;
1326
1327         save_brk = break_ptr;
1328         save_cont = continue_ptr;
1329
1330         n = chain_node(OC_BR | Vx);
1331         continue_ptr = new_node(OC_EXEC);
1332         break_ptr = new_node(OC_EXEC);
1333         chain_group();
1334         n2 = chain_node(OC_EXEC | Vx);
1335         n2->l.n = nn;
1336         n2->a.n = n;
1337         continue_ptr->a.n = n2;
1338         break_ptr->a.n = n->r.n = seq->last;
1339
1340         continue_ptr = save_cont;
1341         break_ptr = save_brk;
1342
1343         return n;
1344 }
1345
1346 /* parse group and attach it to chain */
1347 static void chain_group(void)
1348 {
1349         uint32_t c;
1350         node *n, *n2, *n3;
1351
1352         do {
1353                 c = next_token(TC_GRPSEQ);
1354         } while (c & TC_NEWLINE);
1355
1356         if (c & TC_GRPSTART) {
1357                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1358                         if (t_tclass & TC_NEWLINE)
1359                                 continue;
1360                         rollback_token();
1361                         chain_group();
1362                 }
1363         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1364                 rollback_token();
1365                 chain_expr(OC_EXEC | Vx);
1366         } else {                                                /* TC_STATEMNT */
1367                 switch (t_info & OPCLSMASK) {
1368                 case ST_IF:
1369                         n = chain_node(OC_BR | Vx);
1370                         n->l.n = condition();
1371                         chain_group();
1372                         n2 = chain_node(OC_EXEC);
1373                         n->r.n = seq->last;
1374                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1375                                 chain_group();
1376                                 n2->a.n = seq->last;
1377                         } else {
1378                                 rollback_token();
1379                         }
1380                         break;
1381
1382                 case ST_WHILE:
1383                         n2 = condition();
1384                         n = chain_loop(NULL);
1385                         n->l.n = n2;
1386                         break;
1387
1388                 case ST_DO:
1389                         n2 = chain_node(OC_EXEC);
1390                         n = chain_loop(NULL);
1391                         n2->a.n = n->a.n;
1392                         next_token(TC_WHILE);
1393                         n->l.n = condition();
1394                         break;
1395
1396                 case ST_FOR:
1397                         next_token(TC_SEQSTART);
1398                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1399                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1400                                 if ((n2->info & OPCLSMASK) != OC_IN)
1401                                         syntax_error(EMSG_UNEXP_TOKEN);
1402                                 n = chain_node(OC_WALKINIT | VV);
1403                                 n->l.n = n2->l.n;
1404                                 n->r.n = n2->r.n;
1405                                 n = chain_loop(NULL);
1406                                 n->info = OC_WALKNEXT | Vx;
1407                                 n->l.n = n2->l.n;
1408                         } else {                        /* for (;;) */
1409                                 n = chain_node(OC_EXEC | Vx);
1410                                 n->l.n = n2;
1411                                 n2 = parse_expr(TC_SEMICOL);
1412                                 n3 = parse_expr(TC_SEQTERM);
1413                                 n = chain_loop(n3);
1414                                 n->l.n = n2;
1415                                 if (!n2)
1416                                         n->info = OC_EXEC;
1417                         }
1418                         break;
1419
1420                 case OC_PRINT:
1421                 case OC_PRINTF:
1422                         n = chain_node(t_info);
1423                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1424                         if (t_tclass & TC_OUTRDR) {
1425                                 n->info |= t_info;
1426                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1427                         }
1428                         if (t_tclass & TC_GRPTERM)
1429                                 rollback_token();
1430                         break;
1431
1432                 case OC_BREAK:
1433                         n = chain_node(OC_EXEC);
1434                         n->a.n = break_ptr;
1435                         break;
1436
1437                 case OC_CONTINUE:
1438                         n = chain_node(OC_EXEC);
1439                         n->a.n = continue_ptr;
1440                         break;
1441
1442                 /* delete, next, nextfile, return, exit */
1443                 default:
1444                         chain_expr(t_info);
1445                 }
1446         }
1447 }
1448
1449 static void parse_program(char *p)
1450 {
1451         uint32_t tclass;
1452         node *cn;
1453         func *f;
1454         var *v;
1455
1456         g_pos = p;
1457         t_lineno = 1;
1458         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1459                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1460
1461                 if (tclass & TC_OPTERM)
1462                         continue;
1463
1464                 seq = &mainseq;
1465                 if (tclass & TC_BEGIN) {
1466                         seq = &beginseq;
1467                         chain_group();
1468
1469                 } else if (tclass & TC_END) {
1470                         seq = &endseq;
1471                         chain_group();
1472
1473                 } else if (tclass & TC_FUNCDECL) {
1474                         next_token(TC_FUNCTION);
1475                         g_pos++;
1476                         f = newfunc(t_string);
1477                         f->body.first = NULL;
1478                         f->nargs = 0;
1479                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1480                                 v = findvar(ahash, t_string);
1481                                 v->x.aidx = f->nargs++;
1482
1483                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1484                                         break;
1485                         }
1486                         seq = &f->body;
1487                         chain_group();
1488                         clear_array(ahash);
1489
1490                 } else if (tclass & TC_OPSEQ) {
1491                         rollback_token();
1492                         cn = chain_node(OC_TEST);
1493                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1494                         if (t_tclass & TC_GRPSTART) {
1495                                 rollback_token();
1496                                 chain_group();
1497                         } else {
1498                                 chain_node(OC_PRINT);
1499                         }
1500                         cn->r.n = mainseq.last;
1501
1502                 } else /* if (tclass & TC_GRPSTART) */ {
1503                         rollback_token();
1504                         chain_group();
1505                 }
1506         }
1507 }
1508
1509
1510 /* -------- program execution part -------- */
1511
1512 static node *mk_splitter(const char *s, tsplitter *spl)
1513 {
1514         regex_t *re, *ire;
1515         node *n;
1516
1517         re = &spl->re[0];
1518         ire = &spl->re[1];
1519         n = &spl->n;
1520         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1521                 regfree(re);
1522                 regfree(ire); // TODO: nuke ire, use re+1?
1523         }
1524         if (s[0] && s[1]) { /* strlen(s) > 1 */
1525                 mk_re_node(s, n, re);
1526         } else {
1527                 n->info = (uint32_t) s[0];
1528         }
1529
1530         return n;
1531 }
1532
1533 /* use node as a regular expression. Supplied with node ptr and regex_t
1534  * storage space. Return ptr to regex (if result points to preg, it should
1535  * be later regfree'd manually
1536  */
1537 static regex_t *as_regex(node *op, regex_t *preg)
1538 {
1539         int cflags;
1540         var *v;
1541         const char *s;
1542
1543         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1544                 return icase ? op->r.ire : op->l.re;
1545         }
1546         v = nvalloc(1);
1547         s = getvar_s(evaluate(op, v));
1548
1549         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1550         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1551          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1552          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1553          * (maybe gsub is not supposed to use REG_EXTENDED?).
1554          */
1555         if (regcomp(preg, s, cflags)) {
1556                 cflags &= ~REG_EXTENDED;
1557                 xregcomp(preg, s, cflags);
1558         }
1559         nvfree(v);
1560         return preg;
1561 }
1562
1563 /* gradually increasing buffer.
1564  * note that we reallocate even if n == old_size,
1565  * and thus there is at least one extra allocated byte.
1566  */
1567 static char* qrealloc(char *b, int n, int *size)
1568 {
1569         if (!b || n >= *size) {
1570                 *size = n + (n>>1) + 80;
1571                 b = xrealloc(b, *size);
1572         }
1573         return b;
1574 }
1575
1576 /* resize field storage space */
1577 static void fsrealloc(int size)
1578 {
1579         int i;
1580
1581         if (size >= maxfields) {
1582                 i = maxfields;
1583                 maxfields = size + 16;
1584                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1585                 for (; i < maxfields; i++) {
1586                         Fields[i].type = VF_SPECIAL;
1587                         Fields[i].string = NULL;
1588                 }
1589         }
1590         /* if size < nfields, clear extra field variables */
1591         for (i = size; i < nfields; i++) {
1592                 clrvar(Fields + i);
1593         }
1594         nfields = size;
1595 }
1596
1597 static int awk_split(const char *s, node *spl, char **slist)
1598 {
1599         int l, n;
1600         char c[4];
1601         char *s1;
1602         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1603
1604         /* in worst case, each char would be a separate field */
1605         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1606         strcpy(s1, s);
1607
1608         c[0] = c[1] = (char)spl->info;
1609         c[2] = c[3] = '\0';
1610         if (*getvar_s(intvar[RS]) == '\0')
1611                 c[2] = '\n';
1612
1613         n = 0;
1614         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1615                 if (!*s)
1616                         return n; /* "": zero fields */
1617                 n++; /* at least one field will be there */
1618                 do {
1619                         l = strcspn(s, c+2); /* len till next NUL or \n */
1620                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1621                          && pmatch[0].rm_so <= l
1622                         ) {
1623                                 l = pmatch[0].rm_so;
1624                                 if (pmatch[0].rm_eo == 0) {
1625                                         l++;
1626                                         pmatch[0].rm_eo++;
1627                                 }
1628                                 n++; /* we saw yet another delimiter */
1629                         } else {
1630                                 pmatch[0].rm_eo = l;
1631                                 if (s[l])
1632                                         pmatch[0].rm_eo++;
1633                         }
1634                         memcpy(s1, s, l);
1635                         /* make sure we remove *all* of the separator chars */
1636                         do {
1637                                 s1[l] = '\0';
1638                         } while (++l < pmatch[0].rm_eo);
1639                         nextword(&s1);
1640                         s += pmatch[0].rm_eo;
1641                 } while (*s);
1642                 return n;
1643         }
1644         if (c[0] == '\0') {  /* null split */
1645                 while (*s) {
1646                         *s1++ = *s++;
1647                         *s1++ = '\0';
1648                         n++;
1649                 }
1650                 return n;
1651         }
1652         if (c[0] != ' ') {  /* single-character split */
1653                 if (icase) {
1654                         c[0] = toupper(c[0]);
1655                         c[1] = tolower(c[1]);
1656                 }
1657                 if (*s1)
1658                         n++;
1659                 while ((s1 = strpbrk(s1, c)) != NULL) {
1660                         *s1++ = '\0';
1661                         n++;
1662                 }
1663                 return n;
1664         }
1665         /* space split */
1666         while (*s) {
1667                 s = skip_whitespace(s);
1668                 if (!*s)
1669                         break;
1670                 n++;
1671                 while (*s && !isspace(*s))
1672                         *s1++ = *s++;
1673                 *s1++ = '\0';
1674         }
1675         return n;
1676 }
1677
1678 static void split_f0(void)
1679 {
1680 /* static char *fstrings; */
1681 #define fstrings (G.split_f0__fstrings)
1682
1683         int i, n;
1684         char *s;
1685
1686         if (is_f0_split)
1687                 return;
1688
1689         is_f0_split = TRUE;
1690         free(fstrings);
1691         fsrealloc(0);
1692         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1693         fsrealloc(n);
1694         s = fstrings;
1695         for (i = 0; i < n; i++) {
1696                 Fields[i].string = nextword(&s);
1697                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1698         }
1699
1700         /* set NF manually to avoid side effects */
1701         clrvar(intvar[NF]);
1702         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1703         intvar[NF]->number = nfields;
1704 #undef fstrings
1705 }
1706
1707 /* perform additional actions when some internal variables changed */
1708 static void handle_special(var *v)
1709 {
1710         int n;
1711         char *b;
1712         const char *sep, *s;
1713         int sl, l, len, i, bsize;
1714
1715         if (!(v->type & VF_SPECIAL))
1716                 return;
1717
1718         if (v == intvar[NF]) {
1719                 n = (int)getvar_i(v);
1720                 fsrealloc(n);
1721
1722                 /* recalculate $0 */
1723                 sep = getvar_s(intvar[OFS]);
1724                 sl = strlen(sep);
1725                 b = NULL;
1726                 len = 0;
1727                 for (i = 0; i < n; i++) {
1728                         s = getvar_s(&Fields[i]);
1729                         l = strlen(s);
1730                         if (b) {
1731                                 memcpy(b+len, sep, sl);
1732                                 len += sl;
1733                         }
1734                         b = qrealloc(b, len+l+sl, &bsize);
1735                         memcpy(b+len, s, l);
1736                         len += l;
1737                 }
1738                 if (b)
1739                         b[len] = '\0';
1740                 setvar_p(intvar[F0], b);
1741                 is_f0_split = TRUE;
1742
1743         } else if (v == intvar[F0]) {
1744                 is_f0_split = FALSE;
1745
1746         } else if (v == intvar[FS]) {
1747                 mk_splitter(getvar_s(v), &fsplitter);
1748
1749         } else if (v == intvar[RS]) {
1750                 mk_splitter(getvar_s(v), &rsplitter);
1751
1752         } else if (v == intvar[IGNORECASE]) {
1753                 icase = istrue(v);
1754
1755         } else {                                /* $n */
1756                 n = getvar_i(intvar[NF]);
1757                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1758                 /* right here v is invalid. Just to note... */
1759         }
1760 }
1761
1762 /* step through func/builtin/etc arguments */
1763 static node *nextarg(node **pn)
1764 {
1765         node *n;
1766
1767         n = *pn;
1768         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1769                 *pn = n->r.n;
1770                 n = n->l.n;
1771         } else {
1772                 *pn = NULL;
1773         }
1774         return n;
1775 }
1776
1777 static void hashwalk_init(var *v, xhash *array)
1778 {
1779         hash_item *hi;
1780         unsigned i;
1781         walker_list *w;
1782         walker_list *prev_walker;
1783
1784         if (v->type & VF_WALK) {
1785                 prev_walker = v->x.walker;
1786         } else {
1787                 v->type |= VF_WALK;
1788                 prev_walker = NULL;
1789         }
1790         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1791
1792         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1793         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1794         w->cur = w->end = w->wbuf;
1795         w->prev = prev_walker;
1796         for (i = 0; i < array->csize; i++) {
1797                 hi = array->items[i];
1798                 while (hi) {
1799                         strcpy(w->end, hi->name);
1800                         nextword(&w->end);
1801                         hi = hi->next;
1802                 }
1803         }
1804 }
1805
1806 static int hashwalk_next(var *v)
1807 {
1808         walker_list *w = v->x.walker;
1809
1810         if (w->cur >= w->end) {
1811                 walker_list *prev_walker = w->prev;
1812
1813                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1814                 free(w);
1815                 v->x.walker = prev_walker;
1816                 return FALSE;
1817         }
1818
1819         setvar_s(v, nextword(&w->cur));
1820         return TRUE;
1821 }
1822
1823 /* evaluate node, return 1 when result is true, 0 otherwise */
1824 static int ptest(node *pattern)
1825 {
1826         /* ptest__v is "static": to save stack space? */
1827         return istrue(evaluate(pattern, &G.ptest__v));
1828 }
1829
1830 /* read next record from stream rsm into a variable v */
1831 static int awk_getline(rstream *rsm, var *v)
1832 {
1833         char *b;
1834         regmatch_t pmatch[2];
1835         int size, a, p, pp = 0;
1836         int fd, so, eo, r, rp;
1837         char c, *m, *s;
1838
1839         debug_printf_eval("entered %s()\n", __func__);
1840
1841         /* we're using our own buffer since we need access to accumulating
1842          * characters
1843          */
1844         fd = fileno(rsm->F);
1845         m = rsm->buffer;
1846         a = rsm->adv;
1847         p = rsm->pos;
1848         size = rsm->size;
1849         c = (char) rsplitter.n.info;
1850         rp = 0;
1851
1852         if (!m)
1853                 m = qrealloc(m, 256, &size);
1854
1855         do {
1856                 b = m + a;
1857                 so = eo = p;
1858                 r = 1;
1859                 if (p > 0) {
1860                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1861                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1862                                                         b, 1, pmatch, 0) == 0) {
1863                                         so = pmatch[0].rm_so;
1864                                         eo = pmatch[0].rm_eo;
1865                                         if (b[eo] != '\0')
1866                                                 break;
1867                                 }
1868                         } else if (c != '\0') {
1869                                 s = strchr(b+pp, c);
1870                                 if (!s)
1871                                         s = memchr(b+pp, '\0', p - pp);
1872                                 if (s) {
1873                                         so = eo = s-b;
1874                                         eo++;
1875                                         break;
1876                                 }
1877                         } else {
1878                                 while (b[rp] == '\n')
1879                                         rp++;
1880                                 s = strstr(b+rp, "\n\n");
1881                                 if (s) {
1882                                         so = eo = s-b;
1883                                         while (b[eo] == '\n')
1884                                                 eo++;
1885                                         if (b[eo] != '\0')
1886                                                 break;
1887                                 }
1888                         }
1889                 }
1890
1891                 if (a > 0) {
1892                         memmove(m, m+a, p+1);
1893                         b = m;
1894                         a = 0;
1895                 }
1896
1897                 m = qrealloc(m, a+p+128, &size);
1898                 b = m + a;
1899                 pp = p;
1900                 p += safe_read(fd, b+p, size-p-1);
1901                 if (p < pp) {
1902                         p = 0;
1903                         r = 0;
1904                         setvar_i(intvar[ERRNO], errno);
1905                 }
1906                 b[p] = '\0';
1907
1908         } while (p > pp);
1909
1910         if (p == 0) {
1911                 r--;
1912         } else {
1913                 c = b[so]; b[so] = '\0';
1914                 setvar_s(v, b+rp);
1915                 v->type |= VF_USER;
1916                 b[so] = c;
1917                 c = b[eo]; b[eo] = '\0';
1918                 setvar_s(intvar[RT], b+so);
1919                 b[eo] = c;
1920         }
1921
1922         rsm->buffer = m;
1923         rsm->adv = a + eo;
1924         rsm->pos = p - eo;
1925         rsm->size = size;
1926
1927         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1928
1929         return r;
1930 }
1931
1932 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1933 {
1934         int r = 0;
1935         char c;
1936         const char *s = format;
1937
1938         if (int_as_int && n == (int)n) {
1939                 r = snprintf(b, size, "%d", (int)n);
1940         } else {
1941                 do { c = *s; } while (c && *++s);
1942                 if (strchr("diouxX", c)) {
1943                         r = snprintf(b, size, format, (int)n);
1944                 } else if (strchr("eEfgG", c)) {
1945                         r = snprintf(b, size, format, n);
1946                 } else {
1947                         syntax_error(EMSG_INV_FMT);
1948                 }
1949         }
1950         return r;
1951 }
1952
1953 /* formatted output into an allocated buffer, return ptr to buffer */
1954 static char *awk_printf(node *n)
1955 {
1956         char *b = NULL;
1957         char *fmt, *s, *f;
1958         const char *s1;
1959         int i, j, incr, bsize;
1960         char c, c1;
1961         var *v, *arg;
1962
1963         v = nvalloc(1);
1964         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1965
1966         i = 0;
1967         while (*f) {
1968                 s = f;
1969                 while (*f && (*f != '%' || *++f == '%'))
1970                         f++;
1971                 while (*f && !isalpha(*f)) {
1972                         if (*f == '*')
1973                                 syntax_error("%*x formats are not supported");
1974                         f++;
1975                 }
1976
1977                 incr = (f - s) + MAXVARFMT;
1978                 b = qrealloc(b, incr + i, &bsize);
1979                 c = *f;
1980                 if (c != '\0')
1981                         f++;
1982                 c1 = *f;
1983                 *f = '\0';
1984                 arg = evaluate(nextarg(&n), v);
1985
1986                 j = i;
1987                 if (c == 'c' || !c) {
1988                         i += sprintf(b+i, s, is_numeric(arg) ?
1989                                         (char)getvar_i(arg) : *getvar_s(arg));
1990                 } else if (c == 's') {
1991                         s1 = getvar_s(arg);
1992                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1993                         i += sprintf(b+i, s, s1);
1994                 } else {
1995                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1996                 }
1997                 *f = c1;
1998
1999                 /* if there was an error while sprintf, return value is negative */
2000                 if (i < j)
2001                         i = j;
2002         }
2003
2004         free(fmt);
2005         nvfree(v);
2006         b = xrealloc(b, i + 1);
2007         b[i] = '\0';
2008         return b;
2009 }
2010
2011 /* Common substitution routine.
2012  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2013  * store result into (dest), return number of substitutions.
2014  * If nm = 0, replace all matches.
2015  * If src or dst is NULL, use $0.
2016  * If subexp != 0, enable subexpression matching (\1-\9).
2017  */
2018 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2019 {
2020         char *resbuf;
2021         const char *sp;
2022         int match_no, residx, replen, resbufsize;
2023         int regexec_flags;
2024         regmatch_t pmatch[10];
2025         regex_t sreg, *regex;
2026
2027         resbuf = NULL;
2028         residx = 0;
2029         match_no = 0;
2030         regexec_flags = 0;
2031         regex = as_regex(rn, &sreg);
2032         sp = getvar_s(src ? src : intvar[F0]);
2033         replen = strlen(repl);
2034         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2035                 int so = pmatch[0].rm_so;
2036                 int eo = pmatch[0].rm_eo;
2037
2038                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2039                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2040                 memcpy(resbuf + residx, sp, eo);
2041                 residx += eo;
2042                 if (++match_no >= nm) {
2043                         const char *s;
2044                         int nbs;
2045
2046                         /* replace */
2047                         residx -= (eo - so);
2048                         nbs = 0;
2049                         for (s = repl; *s; s++) {
2050                                 char c = resbuf[residx++] = *s;
2051                                 if (c == '\\') {
2052                                         nbs++;
2053                                         continue;
2054                                 }
2055                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2056                                         int j;
2057                                         residx -= ((nbs + 3) >> 1);
2058                                         j = 0;
2059                                         if (c != '&') {
2060                                                 j = c - '0';
2061                                                 nbs++;
2062                                         }
2063                                         if (nbs % 2) {
2064                                                 resbuf[residx++] = c;
2065                                         } else {
2066                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2067                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2068                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2069                                                 residx += n;
2070                                         }
2071                                 }
2072                                 nbs = 0;
2073                         }
2074                 }
2075
2076                 regexec_flags = REG_NOTBOL;
2077                 sp += eo;
2078                 if (match_no == nm)
2079                         break;
2080                 if (eo == so) {
2081                         /* Empty match (e.g. "b*" will match anywhere).
2082                          * Advance by one char. */
2083 //BUG (bug 1333):
2084 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2085 //... and will erroneously match "b" even though it is NOT at the word start.
2086 //we need REG_NOTBOW but it does not exist...
2087 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2088 //it should be able to do it correctly.
2089                         /* Subtle: this is safe only because
2090                          * qrealloc allocated at least one extra byte */
2091                         resbuf[residx] = *sp;
2092                         if (*sp == '\0')
2093                                 goto ret;
2094                         sp++;
2095                         residx++;
2096                 }
2097         }
2098
2099         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2100         strcpy(resbuf + residx, sp);
2101  ret:
2102         //bb_error_msg("end sp:'%s'%p", sp,sp);
2103         setvar_p(dest ? dest : intvar[F0], resbuf);
2104         if (regex == &sreg)
2105                 regfree(regex);
2106         return match_no;
2107 }
2108
2109 static NOINLINE int do_mktime(const char *ds)
2110 {
2111         struct tm then;
2112         int count;
2113
2114         /*memset(&then, 0, sizeof(then)); - not needed */
2115         then.tm_isdst = -1; /* default is unknown */
2116
2117         /* manpage of mktime says these fields are ints,
2118          * so we can sscanf stuff directly into them */
2119         count = sscanf(ds, "%u %u %u %u %u %u %d",
2120                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2121                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2122                 &then.tm_isdst);
2123
2124         if (count < 6
2125          || (unsigned)then.tm_mon < 1
2126          || (unsigned)then.tm_year < 1900
2127         ) {
2128                 return -1;
2129         }
2130
2131         then.tm_mon -= 1;
2132         then.tm_year -= 1900;
2133
2134         return mktime(&then);
2135 }
2136
2137 static NOINLINE var *exec_builtin(node *op, var *res)
2138 {
2139 #define tspl (G.exec_builtin__tspl)
2140
2141         var *tv;
2142         node *an[4];
2143         var *av[4];
2144         const char *as[4];
2145         regmatch_t pmatch[2];
2146         regex_t sreg, *re;
2147         node *spl;
2148         uint32_t isr, info;
2149         int nargs;
2150         time_t tt;
2151         int i, l, ll, n;
2152
2153         tv = nvalloc(4);
2154         isr = info = op->info;
2155         op = op->l.n;
2156
2157         av[2] = av[3] = NULL;
2158         for (i = 0; i < 4 && op; i++) {
2159                 an[i] = nextarg(&op);
2160                 if (isr & 0x09000000)
2161                         av[i] = evaluate(an[i], &tv[i]);
2162                 if (isr & 0x08000000)
2163                         as[i] = getvar_s(av[i]);
2164                 isr >>= 1;
2165         }
2166
2167         nargs = i;
2168         if ((uint32_t)nargs < (info >> 30))
2169                 syntax_error(EMSG_TOO_FEW_ARGS);
2170
2171         info &= OPNMASK;
2172         switch (info) {
2173
2174         case B_a2:
2175                 if (ENABLE_FEATURE_AWK_LIBM)
2176                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2177                 else
2178                         syntax_error(EMSG_NO_MATH);
2179                 break;
2180
2181         case B_sp: {
2182                 char *s, *s1;
2183
2184                 if (nargs > 2) {
2185                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2186                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2187                 } else {
2188                         spl = &fsplitter.n;
2189                 }
2190
2191                 n = awk_split(as[0], spl, &s);
2192                 s1 = s;
2193                 clear_array(iamarray(av[1]));
2194                 for (i = 1; i <= n; i++)
2195                         setari_u(av[1], i, nextword(&s));
2196                 free(s1);
2197                 setvar_i(res, n);
2198                 break;
2199         }
2200
2201         case B_ss: {
2202                 char *s;
2203
2204                 l = strlen(as[0]);
2205                 i = getvar_i(av[1]) - 1;
2206                 if (i > l)
2207                         i = l;
2208                 if (i < 0)
2209                         i = 0;
2210                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2211                 if (n < 0)
2212                         n = 0;
2213                 s = xstrndup(as[0]+i, n);
2214                 setvar_p(res, s);
2215                 break;
2216         }
2217
2218         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2219          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2220         case B_an:
2221                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2222                 break;
2223
2224         case B_co:
2225                 setvar_i(res, ~getvar_i_int(av[0]));
2226                 break;
2227
2228         case B_ls:
2229                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2230                 break;
2231
2232         case B_or:
2233                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2234                 break;
2235
2236         case B_rs:
2237                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2238                 break;
2239
2240         case B_xo:
2241                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2242                 break;
2243
2244         case B_lo:
2245         case B_up: {
2246                 char *s, *s1;
2247                 s1 = s = xstrdup(as[0]);
2248                 while (*s1) {
2249                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2250                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2251                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2252                         s1++;
2253                 }
2254                 setvar_p(res, s);
2255                 break;
2256         }
2257
2258         case B_ix:
2259                 n = 0;
2260                 ll = strlen(as[1]);
2261                 l = strlen(as[0]) - ll;
2262                 if (ll > 0 && l >= 0) {
2263                         if (!icase) {
2264                                 char *s = strstr(as[0], as[1]);
2265                                 if (s)
2266                                         n = (s - as[0]) + 1;
2267                         } else {
2268                                 /* this piece of code is terribly slow and
2269                                  * really should be rewritten
2270                                  */
2271                                 for (i = 0; i <= l; i++) {
2272                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2273                                                 n = i+1;
2274                                                 break;
2275                                         }
2276                                 }
2277                         }
2278                 }
2279                 setvar_i(res, n);
2280                 break;
2281
2282         case B_ti:
2283                 if (nargs > 1)
2284                         tt = getvar_i(av[1]);
2285                 else
2286                         time(&tt);
2287                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2288                 i = strftime(g_buf, MAXVARFMT,
2289                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2290                         localtime(&tt));
2291                 g_buf[i] = '\0';
2292                 setvar_s(res, g_buf);
2293                 break;
2294
2295         case B_mt:
2296                 setvar_i(res, do_mktime(as[0]));
2297                 break;
2298
2299         case B_ma:
2300                 re = as_regex(an[1], &sreg);
2301                 n = regexec(re, as[0], 1, pmatch, 0);
2302                 if (n == 0) {
2303                         pmatch[0].rm_so++;
2304                         pmatch[0].rm_eo++;
2305                 } else {
2306                         pmatch[0].rm_so = 0;
2307                         pmatch[0].rm_eo = -1;
2308                 }
2309                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2310                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2311                 setvar_i(res, pmatch[0].rm_so);
2312                 if (re == &sreg)
2313                         regfree(re);
2314                 break;
2315
2316         case B_ge:
2317                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2318                 break;
2319
2320         case B_gs:
2321                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2322                 break;
2323
2324         case B_su:
2325                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2326                 break;
2327         }
2328
2329         nvfree(tv);
2330         return res;
2331 #undef tspl
2332 }
2333
2334 /*
2335  * Evaluate node - the heart of the program. Supplied with subtree
2336  * and place where to store result. returns ptr to result.
2337  */
2338 #define XC(n) ((n) >> 8)
2339
2340 static var *evaluate(node *op, var *res)
2341 {
2342 /* This procedure is recursive so we should count every byte */
2343 #define fnargs (G.evaluate__fnargs)
2344 /* seed is initialized to 1 */
2345 #define seed   (G.evaluate__seed)
2346 #define sreg   (G.evaluate__sreg)
2347
2348         var *v1;
2349
2350         if (!op)
2351                 return setvar_s(res, NULL);
2352
2353         debug_printf_eval("entered %s()\n", __func__);
2354
2355         v1 = nvalloc(2);
2356
2357         while (op) {
2358                 struct {
2359                         var *v;
2360                         const char *s;
2361                 } L = L; /* for compiler */
2362                 struct {
2363                         var *v;
2364                         const char *s;
2365                 } R = R;
2366                 double L_d = L_d;
2367                 uint32_t opinfo;
2368                 int opn;
2369                 node *op1;
2370
2371                 opinfo = op->info;
2372                 opn = (opinfo & OPNMASK);
2373                 g_lineno = op->lineno;
2374                 op1 = op->l.n;
2375                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2376
2377                 /* execute inevitable things */
2378                 if (opinfo & OF_RES1)
2379                         L.v = evaluate(op1, v1);
2380                 if (opinfo & OF_RES2)
2381                         R.v = evaluate(op->r.n, v1+1);
2382                 if (opinfo & OF_STR1) {
2383                         L.s = getvar_s(L.v);
2384                         debug_printf_eval("L.s:'%s'\n", L.s);
2385                 }
2386                 if (opinfo & OF_STR2) {
2387                         R.s = getvar_s(R.v);
2388                         debug_printf_eval("R.s:'%s'\n", R.s);
2389                 }
2390                 if (opinfo & OF_NUM1) {
2391                         L_d = getvar_i(L.v);
2392                         debug_printf_eval("L_d:%f\n", L_d);
2393                 }
2394
2395                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2396                 switch (XC(opinfo & OPCLSMASK)) {
2397
2398                 /* -- iterative node type -- */
2399
2400                 /* test pattern */
2401                 case XC( OC_TEST ):
2402                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2403                                 /* it's range pattern */
2404                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2405                                         op->info |= OF_CHECKED;
2406                                         if (ptest(op1->r.n))
2407                                                 op->info &= ~OF_CHECKED;
2408                                         op = op->a.n;
2409                                 } else {
2410                                         op = op->r.n;
2411                                 }
2412                         } else {
2413                                 op = ptest(op1) ? op->a.n : op->r.n;
2414                         }
2415                         break;
2416
2417                 /* just evaluate an expression, also used as unconditional jump */
2418                 case XC( OC_EXEC ):
2419                         break;
2420
2421                 /* branch, used in if-else and various loops */
2422                 case XC( OC_BR ):
2423                         op = istrue(L.v) ? op->a.n : op->r.n;
2424                         break;
2425
2426                 /* initialize for-in loop */
2427                 case XC( OC_WALKINIT ):
2428                         hashwalk_init(L.v, iamarray(R.v));
2429                         break;
2430
2431                 /* get next array item */
2432                 case XC( OC_WALKNEXT ):
2433                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2434                         break;
2435
2436                 case XC( OC_PRINT ):
2437                 case XC( OC_PRINTF ): {
2438                         FILE *F = stdout;
2439
2440                         if (op->r.n) {
2441                                 rstream *rsm = newfile(R.s);
2442                                 if (!rsm->F) {
2443                                         if (opn == '|') {
2444                                                 rsm->F = popen(R.s, "w");
2445                                                 if (rsm->F == NULL)
2446                                                         bb_perror_msg_and_die("popen");
2447                                                 rsm->is_pipe = 1;
2448                                         } else {
2449                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2450                                         }
2451                                 }
2452                                 F = rsm->F;
2453                         }
2454
2455                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2456                                 if (!op1) {
2457                                         fputs(getvar_s(intvar[F0]), F);
2458                                 } else {
2459                                         while (op1) {
2460                                                 var *v = evaluate(nextarg(&op1), v1);
2461                                                 if (v->type & VF_NUMBER) {
2462                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2463                                                                         getvar_i(v), TRUE);
2464                                                         fputs(g_buf, F);
2465                                                 } else {
2466                                                         fputs(getvar_s(v), F);
2467                                                 }
2468
2469                                                 if (op1)
2470                                                         fputs(getvar_s(intvar[OFS]), F);
2471                                         }
2472                                 }
2473                                 fputs(getvar_s(intvar[ORS]), F);
2474
2475                         } else {        /* OC_PRINTF */
2476                                 char *s = awk_printf(op1);
2477                                 fputs(s, F);
2478                                 free(s);
2479                         }
2480                         fflush(F);
2481                         break;
2482                 }
2483
2484                 case XC( OC_DELETE ): {
2485                         uint32_t info = op1->info & OPCLSMASK;
2486                         var *v;
2487
2488                         if (info == OC_VAR) {
2489                                 v = op1->l.v;
2490                         } else if (info == OC_FNARG) {
2491                                 v = &fnargs[op1->l.aidx];
2492                         } else {
2493                                 syntax_error(EMSG_NOT_ARRAY);
2494                         }
2495
2496                         if (op1->r.n) {
2497                                 const char *s;
2498                                 clrvar(L.v);
2499                                 s = getvar_s(evaluate(op1->r.n, v1));
2500                                 hash_remove(iamarray(v), s);
2501                         } else {
2502                                 clear_array(iamarray(v));
2503                         }
2504                         break;
2505                 }
2506
2507                 case XC( OC_NEWSOURCE ):
2508                         g_progname = op->l.new_progname;
2509                         break;
2510
2511                 case XC( OC_RETURN ):
2512                         copyvar(res, L.v);
2513                         break;
2514
2515                 case XC( OC_NEXTFILE ):
2516                         nextfile = TRUE;
2517                 case XC( OC_NEXT ):
2518                         nextrec = TRUE;
2519                 case XC( OC_DONE ):
2520                         clrvar(res);
2521                         break;
2522
2523                 case XC( OC_EXIT ):
2524                         awk_exit(L_d);
2525
2526                 /* -- recursive node type -- */
2527
2528                 case XC( OC_VAR ):
2529                         L.v = op->l.v;
2530                         if (L.v == intvar[NF])
2531                                 split_f0();
2532                         goto v_cont;
2533
2534                 case XC( OC_FNARG ):
2535                         L.v = &fnargs[op->l.aidx];
2536  v_cont:
2537                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2538                         break;
2539
2540                 case XC( OC_IN ):
2541                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2542                         break;
2543
2544                 case XC( OC_REGEXP ):
2545                         op1 = op;
2546                         L.s = getvar_s(intvar[F0]);
2547                         goto re_cont;
2548
2549                 case XC( OC_MATCH ):
2550                         op1 = op->r.n;
2551  re_cont:
2552                         {
2553                                 regex_t *re = as_regex(op1, &sreg);
2554                                 int i = regexec(re, L.s, 0, NULL, 0);
2555                                 if (re == &sreg)
2556                                         regfree(re);
2557                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2558                         }
2559                         break;
2560
2561                 case XC( OC_MOVE ):
2562                         debug_printf_eval("MOVE\n");
2563                         /* if source is a temporary string, jusk relink it to dest */
2564 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2565 //then L.v ends up being a string, which is wrong
2566 //                      if (R.v == v1+1 && R.v->string) {
2567 //                              res = setvar_p(L.v, R.v->string);
2568 //                              R.v->string = NULL;
2569 //                      } else {
2570                                 res = copyvar(L.v, R.v);
2571 //                      }
2572                         break;
2573
2574                 case XC( OC_TERNARY ):
2575                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2576                                 syntax_error(EMSG_POSSIBLE_ERROR);
2577                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2578                         break;
2579
2580                 case XC( OC_FUNC ): {
2581                         var *vbeg, *v;
2582                         const char *sv_progname;
2583
2584                         if (!op->r.f->body.first)
2585                                 syntax_error(EMSG_UNDEF_FUNC);
2586
2587                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2588                         while (op1) {
2589                                 var *arg = evaluate(nextarg(&op1), v1);
2590                                 copyvar(v, arg);
2591                                 v->type |= VF_CHILD;
2592                                 v->x.parent = arg;
2593                                 if (++v - vbeg >= op->r.f->nargs)
2594                                         break;
2595                         }
2596
2597                         v = fnargs;
2598                         fnargs = vbeg;
2599                         sv_progname = g_progname;
2600
2601                         res = evaluate(op->r.f->body.first, res);
2602
2603                         g_progname = sv_progname;
2604                         nvfree(fnargs);
2605                         fnargs = v;
2606
2607                         break;
2608                 }
2609
2610                 case XC( OC_GETLINE ):
2611                 case XC( OC_PGETLINE ): {
2612                         rstream *rsm;
2613                         int i;
2614
2615                         if (op1) {
2616                                 rsm = newfile(L.s);
2617                                 if (!rsm->F) {
2618                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2619                                                 rsm->F = popen(L.s, "r");
2620                                                 rsm->is_pipe = TRUE;
2621                                         } else {
2622                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2623                                         }
2624                                 }
2625                         } else {
2626                                 if (!iF)
2627                                         iF = next_input_file();
2628                                 rsm = iF;
2629                         }
2630
2631                         if (!rsm->F) {
2632                                 setvar_i(intvar[ERRNO], errno);
2633                                 setvar_i(res, -1);
2634                                 break;
2635                         }
2636
2637                         if (!op->r.n)
2638                                 R.v = intvar[F0];
2639
2640                         i = awk_getline(rsm, R.v);
2641                         if (i > 0 && !op1) {
2642                                 incvar(intvar[FNR]);
2643                                 incvar(intvar[NR]);
2644                         }
2645                         setvar_i(res, i);
2646                         break;
2647                 }
2648
2649                 /* simple builtins */
2650                 case XC( OC_FBLTIN ): {
2651                         double R_d = R_d; /* for compiler */
2652
2653                         switch (opn) {
2654                         case F_in:
2655                                 R_d = (int)L_d;
2656                                 break;
2657
2658                         case F_rn:
2659                                 R_d = (double)rand() / (double)RAND_MAX;
2660                                 break;
2661
2662                         case F_co:
2663                                 if (ENABLE_FEATURE_AWK_LIBM) {
2664                                         R_d = cos(L_d);
2665                                         break;
2666                                 }
2667
2668                         case F_ex:
2669                                 if (ENABLE_FEATURE_AWK_LIBM) {
2670                                         R_d = exp(L_d);
2671                                         break;
2672                                 }
2673
2674                         case F_lg:
2675                                 if (ENABLE_FEATURE_AWK_LIBM) {
2676                                         R_d = log(L_d);
2677                                         break;
2678                                 }
2679
2680                         case F_si:
2681                                 if (ENABLE_FEATURE_AWK_LIBM) {
2682                                         R_d = sin(L_d);
2683                                         break;
2684                                 }
2685
2686                         case F_sq:
2687                                 if (ENABLE_FEATURE_AWK_LIBM) {
2688                                         R_d = sqrt(L_d);
2689                                         break;
2690                                 }
2691
2692                                 syntax_error(EMSG_NO_MATH);
2693                                 break;
2694
2695                         case F_sr:
2696                                 R_d = (double)seed;
2697                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2698                                 srand(seed);
2699                                 break;
2700
2701                         case F_ti:
2702                                 R_d = time(NULL);
2703                                 break;
2704
2705                         case F_le:
2706                                 if (!op1)
2707                                         L.s = getvar_s(intvar[F0]);
2708                                 R_d = strlen(L.s);
2709                                 break;
2710
2711                         case F_sy:
2712                                 fflush_all();
2713                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2714                                                 ? (system(L.s) >> 8) : 0;
2715                                 break;
2716
2717                         case F_ff:
2718                                 if (!op1) {
2719                                         fflush(stdout);
2720                                 } else if (L.s && *L.s) {
2721                                         rstream *rsm = newfile(L.s);
2722                                         fflush(rsm->F);
2723                                 } else {
2724                                         fflush_all();
2725                                 }
2726                                 break;
2727
2728                         case F_cl: {
2729                                 rstream *rsm;
2730                                 int err = 0;
2731                                 rsm = (rstream *)hash_search(fdhash, L.s);
2732                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2733                                 if (rsm) {
2734                                         debug_printf_eval("OC_FBLTIN F_cl "
2735                                                 "rsm->is_pipe:%d, ->F:%p\n",
2736                                                 rsm->is_pipe, rsm->F);
2737                                         /* Can be NULL if open failed. Example:
2738                                          * getline line <"doesnt_exist";
2739                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2740                                          */
2741                                         if (rsm->F)
2742                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2743                                         free(rsm->buffer);
2744                                         hash_remove(fdhash, L.s);
2745                                 }
2746                                 if (err)
2747                                         setvar_i(intvar[ERRNO], errno);
2748                                 R_d = (double)err;
2749                                 break;
2750                         }
2751                         } /* switch */
2752                         setvar_i(res, R_d);
2753                         break;
2754                 }
2755
2756                 case XC( OC_BUILTIN ):
2757                         res = exec_builtin(op, res);
2758                         break;
2759
2760                 case XC( OC_SPRINTF ):
2761                         setvar_p(res, awk_printf(op1));
2762                         break;
2763
2764                 case XC( OC_UNARY ): {
2765                         double Ld, R_d;
2766
2767                         Ld = R_d = getvar_i(R.v);
2768                         switch (opn) {
2769                         case 'P':
2770                                 Ld = ++R_d;
2771                                 goto r_op_change;
2772                         case 'p':
2773                                 R_d++;
2774                                 goto r_op_change;
2775                         case 'M':
2776                                 Ld = --R_d;
2777                                 goto r_op_change;
2778                         case 'm':
2779                                 R_d--;
2780  r_op_change:
2781                                 setvar_i(R.v, R_d);
2782                                 break;
2783                         case '!':
2784                                 Ld = !istrue(R.v);
2785                                 break;
2786                         case '-':
2787                                 Ld = -R_d;
2788                                 break;
2789                         }
2790                         setvar_i(res, Ld);
2791                         break;
2792                 }
2793
2794                 case XC( OC_FIELD ): {
2795                         int i = (int)getvar_i(R.v);
2796                         if (i == 0) {
2797                                 res = intvar[F0];
2798                         } else {
2799                                 split_f0();
2800                                 if (i > nfields)
2801                                         fsrealloc(i);
2802                                 res = &Fields[i - 1];
2803                         }
2804                         break;
2805                 }
2806
2807                 /* concatenation (" ") and index joining (",") */
2808                 case XC( OC_CONCAT ):
2809                 case XC( OC_COMMA ): {
2810                         const char *sep = "";
2811                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2812                                 sep = getvar_s(intvar[SUBSEP]);
2813                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2814                         break;
2815                 }
2816
2817                 case XC( OC_LAND ):
2818                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2819                         break;
2820
2821                 case XC( OC_LOR ):
2822                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2823                         break;
2824
2825                 case XC( OC_BINARY ):
2826                 case XC( OC_REPLACE ): {
2827                         double R_d = getvar_i(R.v);
2828                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2829                         switch (opn) {
2830                         case '+':
2831                                 L_d += R_d;
2832                                 break;
2833                         case '-':
2834                                 L_d -= R_d;
2835                                 break;
2836                         case '*':
2837                                 L_d *= R_d;
2838                                 break;
2839                         case '/':
2840                                 if (R_d == 0)
2841                                         syntax_error(EMSG_DIV_BY_ZERO);
2842                                 L_d /= R_d;
2843                                 break;
2844                         case '&':
2845                                 if (ENABLE_FEATURE_AWK_LIBM)
2846                                         L_d = pow(L_d, R_d);
2847                                 else
2848                                         syntax_error(EMSG_NO_MATH);
2849                                 break;
2850                         case '%':
2851                                 if (R_d == 0)
2852                                         syntax_error(EMSG_DIV_BY_ZERO);
2853                                 L_d -= (int)(L_d / R_d) * R_d;
2854                                 break;
2855                         }
2856                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2857                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2858                         break;
2859                 }
2860
2861                 case XC( OC_COMPARE ): {
2862                         int i = i; /* for compiler */
2863                         double Ld;
2864
2865                         if (is_numeric(L.v) && is_numeric(R.v)) {
2866                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2867                         } else {
2868                                 const char *l = getvar_s(L.v);
2869                                 const char *r = getvar_s(R.v);
2870                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2871                         }
2872                         switch (opn & 0xfe) {
2873                         case 0:
2874                                 i = (Ld > 0);
2875                                 break;
2876                         case 2:
2877                                 i = (Ld >= 0);
2878                                 break;
2879                         case 4:
2880                                 i = (Ld == 0);
2881                                 break;
2882                         }
2883                         setvar_i(res, (i == 0) ^ (opn & 1));
2884                         break;
2885                 }
2886
2887                 default:
2888                         syntax_error(EMSG_POSSIBLE_ERROR);
2889                 }
2890                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2891                         op = op->a.n;
2892                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2893                         break;
2894                 if (nextrec)
2895                         break;
2896         } /* while (op) */
2897
2898         nvfree(v1);
2899         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2900         return res;
2901 #undef fnargs
2902 #undef seed
2903 #undef sreg
2904 }
2905
2906
2907 /* -------- main & co. -------- */
2908
2909 static int awk_exit(int r)
2910 {
2911         var tv;
2912         unsigned i;
2913         hash_item *hi;
2914
2915         zero_out_var(&tv);
2916
2917         if (!exiting) {
2918                 exiting = TRUE;
2919                 nextrec = FALSE;
2920                 evaluate(endseq.first, &tv);
2921         }
2922
2923         /* waiting for children */
2924         for (i = 0; i < fdhash->csize; i++) {
2925                 hi = fdhash->items[i];
2926                 while (hi) {
2927                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2928                                 pclose(hi->data.rs.F);
2929                         hi = hi->next;
2930                 }
2931         }
2932
2933         exit(r);
2934 }
2935
2936 /* if expr looks like "var=value", perform assignment and return 1,
2937  * otherwise return 0 */
2938 static int is_assignment(const char *expr)
2939 {
2940         char *exprc, *val, *s, *s1;
2941
2942         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2943                 return FALSE;
2944         }
2945
2946         exprc = xstrdup(expr);
2947         val = exprc + (val - expr);
2948         *val++ = '\0';
2949
2950         s = s1 = val;
2951         while ((*s1 = nextchar(&s)) != '\0')
2952                 s1++;
2953
2954         setvar_u(newvar(exprc), val);
2955         free(exprc);
2956         return TRUE;
2957 }
2958
2959 /* switch to next input file */
2960 static rstream *next_input_file(void)
2961 {
2962 #define rsm          (G.next_input_file__rsm)
2963 #define files_happen (G.next_input_file__files_happen)
2964
2965         FILE *F = NULL;
2966         const char *fname, *ind;
2967
2968         if (rsm.F)
2969                 fclose(rsm.F);
2970         rsm.F = NULL;
2971         rsm.pos = rsm.adv = 0;
2972
2973         do {
2974                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2975                         if (files_happen)
2976                                 return NULL;
2977                         fname = "-";
2978                         F = stdin;
2979                 } else {
2980                         ind = getvar_s(incvar(intvar[ARGIND]));
2981                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2982                         if (fname && *fname && !is_assignment(fname))
2983                                 F = xfopen_stdin(fname);
2984                 }
2985         } while (!F);
2986
2987         files_happen = TRUE;
2988         setvar_s(intvar[FILENAME], fname);
2989         rsm.F = F;
2990         return &rsm;
2991 #undef rsm
2992 #undef files_happen
2993 }
2994
2995 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2996 int awk_main(int argc, char **argv)
2997 {
2998         unsigned opt;
2999         char *opt_F, *opt_W;
3000         llist_t *list_v = NULL;
3001         llist_t *list_f = NULL;
3002         int i, j;
3003         var *v;
3004         var tv;
3005         char **envp;
3006         char *vnames = (char *)vNames; /* cheat */
3007         char *vvalues = (char *)vValues;
3008
3009         INIT_G();
3010
3011         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3012          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3013         if (ENABLE_LOCALE_SUPPORT)
3014                 setlocale(LC_NUMERIC, "C");
3015
3016         zero_out_var(&tv);
3017
3018         /* allocate global buffer */
3019         g_buf = xmalloc(MAXVARFMT + 1);
3020
3021         vhash = hash_init();
3022         ahash = hash_init();
3023         fdhash = hash_init();
3024         fnhash = hash_init();
3025
3026         /* initialize variables */
3027         for (i = 0; *vnames; i++) {
3028                 intvar[i] = v = newvar(nextword(&vnames));
3029                 if (*vvalues != '\377')
3030                         setvar_s(v, nextword(&vvalues));
3031                 else
3032                         setvar_i(v, 0);
3033
3034                 if (*vnames == '*') {
3035                         v->type |= VF_SPECIAL;
3036                         vnames++;
3037                 }
3038         }
3039
3040         handle_special(intvar[FS]);
3041         handle_special(intvar[RS]);
3042
3043         newfile("/dev/stdin")->F = stdin;
3044         newfile("/dev/stdout")->F = stdout;
3045         newfile("/dev/stderr")->F = stderr;
3046
3047         /* Huh, people report that sometimes environ is NULL. Oh well. */
3048         if (environ) for (envp = environ; *envp; envp++) {
3049                 /* environ is writable, thus we don't strdup it needlessly */
3050                 char *s = *envp;
3051                 char *s1 = strchr(s, '=');
3052                 if (s1) {
3053                         *s1 = '\0';
3054                         /* Both findvar and setvar_u take const char*
3055                          * as 2nd arg -> environment is not trashed */
3056                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3057                         *s1 = '=';
3058                 }
3059         }
3060         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3061         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3062         argv += optind;
3063         argc -= optind;
3064         if (opt & 0x1)
3065                 setvar_s(intvar[FS], opt_F); // -F
3066         while (list_v) { /* -v */
3067                 if (!is_assignment(llist_pop(&list_v)))
3068                         bb_show_usage();
3069         }
3070         if (list_f) { /* -f */
3071                 do {
3072                         char *s = NULL;
3073                         FILE *from_file;
3074
3075                         g_progname = llist_pop(&list_f);
3076                         from_file = xfopen_stdin(g_progname);
3077                         /* one byte is reserved for some trick in next_token */
3078                         for (i = j = 1; j > 0; i += j) {
3079                                 s = xrealloc(s, i + 4096);
3080                                 j = fread(s + i, 1, 4094, from_file);
3081                         }
3082                         s[i] = '\0';
3083                         fclose(from_file);
3084                         parse_program(s + 1);
3085                         free(s);
3086                 } while (list_f);
3087                 argc++;
3088         } else { // no -f: take program from 1st parameter
3089                 if (!argc)
3090                         bb_show_usage();
3091                 g_progname = "cmd. line";
3092                 parse_program(*argv++);
3093         }
3094         if (opt & 0x8) // -W
3095                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3096
3097         /* fill in ARGV array */
3098         setvar_i(intvar[ARGC], argc);
3099         setari_u(intvar[ARGV], 0, "awk");
3100         i = 0;
3101         while (*argv)
3102                 setari_u(intvar[ARGV], ++i, *argv++);
3103
3104         evaluate(beginseq.first, &tv);
3105         if (!mainseq.first && !endseq.first)
3106                 awk_exit(EXIT_SUCCESS);
3107
3108         /* input file could already be opened in BEGIN block */
3109         if (!iF)
3110                 iF = next_input_file();
3111
3112         /* passing through input files */
3113         while (iF) {
3114                 nextfile = FALSE;
3115                 setvar_i(intvar[FNR], 0);
3116
3117                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3118                         nextrec = FALSE;
3119                         incvar(intvar[NR]);
3120                         incvar(intvar[FNR]);
3121                         evaluate(mainseq.first, &tv);
3122
3123                         if (nextfile)
3124                                 break;
3125                 }
3126
3127                 if (i < 0)
3128                         syntax_error(strerror(errno));
3129
3130                 iF = next_input_file();
3131         }
3132
3133         awk_exit(EXIT_SUCCESS);
3134         /*return 0;*/
3135 }