Bump to version 1.22.1
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //config:config AWK
11 //config:       bool "awk"
12 //config:       default y
13 //config:       help
14 //config:         Awk is used as a pattern scanning and processing language. This is
15 //config:         the BusyBox implementation of that programming language.
16 //config:
17 //config:config FEATURE_AWK_LIBM
18 //config:       bool "Enable math functions (requires libm)"
19 //config:       default y
20 //config:       depends on AWK
21 //config:       help
22 //config:         Enable math functions of the Awk programming language.
23 //config:         NOTE: This will require libm to be present for linking.
24 //config:
25 //config:config FEATURE_AWK_GNU_EXTENSIONS
26 //config:       bool "Enable a few GNU extensions"
27 //config:       default y
28 //config:       depends on AWK
29 //config:       help
30 //config:         Enable a few features from gawk:
31 //config:         * command line option -e AWK_PROGRAM
32 //config:         * simultaneous use of -f and -e on the command line.
33 //config:           This enables the use of awk library files.
34 //config:           Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
35
36 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
37
38 //kbuild:lib-$(CONFIG_AWK) += awk.o
39
40 //usage:#define awk_trivial_usage
41 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42 //usage:#define awk_full_usage "\n\n"
43 //usage:       "        -v VAR=VAL      Set variable"
44 //usage:     "\n        -F SEP          Use SEP as field separator"
45 //usage:     "\n        -f FILE         Read program from FILE"
46 //usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
47 //usage:     "\n        -e AWK_PROGRAM"
48 //usage:        )
49
50 #include "libbb.h"
51 #include "xregex.h"
52 #include <math.h>
53
54 /* This is a NOEXEC applet. Be very careful! */
55
56
57 /* If you comment out one of these below, it will be #defined later
58  * to perform debug printfs to stderr: */
59 #define debug_printf_walker(...)  do {} while (0)
60 #define debug_printf_eval(...)  do {} while (0)
61 #define debug_printf_parse(...)  do {} while (0)
62
63 #ifndef debug_printf_walker
64 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
65 #endif
66 #ifndef debug_printf_eval
67 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
68 #endif
69 #ifndef debug_printf_parse
70 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
71 #endif
72
73
74 #define OPTSTR_AWK \
75         "F:v:f:" \
76         IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
77         "W:"
78 #define OPTCOMPLSTR_AWK \
79         "v::f::" \
80         IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
81 enum {
82         OPTBIT_F,       /* define field separator */
83         OPTBIT_v,       /* define variable */
84         OPTBIT_f,       /* pull in awk program from file */
85         IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86         OPTBIT_W,       /* -W ignored */
87         OPT_F = 1 << OPTBIT_F,
88         OPT_v = 1 << OPTBIT_v,
89         OPT_f = 1 << OPTBIT_f,
90         OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
91         OPT_W = 1 << OPTBIT_W
92 };
93
94 #define MAXVARFMT       240
95 #define MINNVBLOCK      64
96
97 /* variable flags */
98 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
99 #define VF_ARRAY        0x0002  /* 1 = it's an array */
100
101 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
102 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
103 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
104 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
105 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
106 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
107 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
108
109 /* these flags are static, don't change them when value is changed */
110 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
111
112 typedef struct walker_list {
113         char *end;
114         char *cur;
115         struct walker_list *prev;
116         char wbuf[1];
117 } walker_list;
118
119 /* Variable */
120 typedef struct var_s {
121         unsigned type;            /* flags */
122         double number;
123         char *string;
124         union {
125                 int aidx;               /* func arg idx (for compilation stage) */
126                 struct xhash_s *array;  /* array ptr */
127                 struct var_s *parent;   /* for func args, ptr to actual parameter */
128                 walker_list *walker;    /* list of array elements (for..in) */
129         } x;
130 } var;
131
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s {
134         struct node_s *first;
135         struct node_s *last;
136         const char *programname;
137 } chain;
138
139 /* Function */
140 typedef struct func_s {
141         unsigned nargs;
142         struct chain_s body;
143 } func;
144
145 /* I/O stream */
146 typedef struct rstream_s {
147         FILE *F;
148         char *buffer;
149         int adv;
150         int size;
151         int pos;
152         smallint is_pipe;
153 } rstream;
154
155 typedef struct hash_item_s {
156         union {
157                 struct var_s v;         /* variable/array hash */
158                 struct rstream_s rs;    /* redirect streams hash */
159                 struct func_s f;        /* functions hash */
160         } data;
161         struct hash_item_s *next;       /* next in chain */
162         char name[1];                   /* really it's longer */
163 } hash_item;
164
165 typedef struct xhash_s {
166         unsigned nel;           /* num of elements */
167         unsigned csize;         /* current hash size */
168         unsigned nprime;        /* next hash size in PRIMES[] */
169         unsigned glen;          /* summary length of item names */
170         struct hash_item_s **items;
171 } xhash;
172
173 /* Tree node */
174 typedef struct node_s {
175         uint32_t info;
176         unsigned lineno;
177         union {
178                 struct node_s *n;
179                 var *v;
180                 int aidx;
181                 char *new_progname;
182                 regex_t *re;
183         } l;
184         union {
185                 struct node_s *n;
186                 regex_t *ire;
187                 func *f;
188         } r;
189         union {
190                 struct node_s *n;
191         } a;
192 } node;
193
194 /* Block of temporary variables */
195 typedef struct nvblock_s {
196         int size;
197         var *pos;
198         struct nvblock_s *prev;
199         struct nvblock_s *next;
200         var nv[];
201 } nvblock;
202
203 typedef struct tsplitter_s {
204         node n;
205         regex_t re[2];
206 } tsplitter;
207
208 /* simple token classes */
209 /* Order and hex values are very important!!!  See next_token() */
210 #define TC_SEQSTART     1                       /* ( */
211 #define TC_SEQTERM      (1 << 1)                /* ) */
212 #define TC_REGEXP       (1 << 2)                /* /.../ */
213 #define TC_OUTRDR       (1 << 3)                /* | > >> */
214 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
215 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
216 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
217 #define TC_IN           (1 << 7)
218 #define TC_COMMA        (1 << 8)
219 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
220 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
221 #define TC_ARRTERM      (1 << 11)               /* ] */
222 #define TC_GRPSTART     (1 << 12)               /* { */
223 #define TC_GRPTERM      (1 << 13)               /* } */
224 #define TC_SEMICOL      (1 << 14)
225 #define TC_NEWLINE      (1 << 15)
226 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
227 #define TC_WHILE        (1 << 17)
228 #define TC_ELSE         (1 << 18)
229 #define TC_BUILTIN      (1 << 19)
230 #define TC_GETLINE      (1 << 20)
231 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
232 #define TC_BEGIN        (1 << 22)
233 #define TC_END          (1 << 23)
234 #define TC_EOF          (1 << 24)
235 #define TC_VARIABLE     (1 << 25)
236 #define TC_ARRAY        (1 << 26)
237 #define TC_FUNCTION     (1 << 27)
238 #define TC_STRING       (1 << 28)
239 #define TC_NUMBER       (1 << 29)
240
241 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
242
243 /* combined token classes */
244 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245 //#define       TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
248
249 #define TC_STATEMNT (TC_STATX | TC_WHILE)
250 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
251
252 /* word tokens, cannot mean something else if not expected */
253 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
254                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
255
256 /* discard newlines after these */
257 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
258                    | TC_BINOP | TC_OPTERM)
259
260 /* what can expression begin with */
261 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
262 /* what can group begin with */
263 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
264
265 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
266 /* operator is inserted between them */
267 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
268                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
269 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
270
271 #define OF_RES1    0x010000
272 #define OF_RES2    0x020000
273 #define OF_STR1    0x040000
274 #define OF_STR2    0x080000
275 #define OF_NUM1    0x100000
276 #define OF_CHECKED 0x200000
277
278 /* combined operator flags */
279 #define xx      0
280 #define xV      OF_RES2
281 #define xS      (OF_RES2 | OF_STR2)
282 #define Vx      OF_RES1
283 #define VV      (OF_RES1 | OF_RES2)
284 #define Nx      (OF_RES1 | OF_NUM1)
285 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
286 #define Sx      (OF_RES1 | OF_STR1)
287 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
288 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
289
290 #define OPCLSMASK 0xFF00
291 #define OPNMASK   0x007F
292
293 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
294  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
295  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
296  */
297 #undef P
298 #undef PRIMASK
299 #undef PRIMASK2
300 #define P(x)      (x << 24)
301 #define PRIMASK   0x7F000000
302 #define PRIMASK2  0x7E000000
303
304 /* Operation classes */
305
306 #define SHIFT_TIL_THIS  0x0600
307 #define RECUR_FROM_THIS 0x1000
308
309 enum {
310         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
311         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
312
313         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
314         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
315         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
316
317         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
318         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
319         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
320         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
321         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
322         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
323         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
324         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
325         OC_DONE = 0x2800,
326
327         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
328         ST_WHILE = 0x3300
329 };
330
331 /* simple builtins */
332 enum {
333         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
334         F_ti,   F_le,   F_sy,   F_ff,   F_cl
335 };
336
337 /* builtins */
338 enum {
339         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
340         B_ge,   B_gs,   B_su,
341         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
342 };
343
344 /* tokens and their corresponding info values */
345
346 #define NTC     "\377"  /* switch to next token class (tc<<1) */
347 #define NTCC    '\377'
348
349 #define OC_B  OC_BUILTIN
350
351 static const char tokenlist[] ALIGN1 =
352         "\1("         NTC
353         "\1)"         NTC
354         "\1/"         NTC                                   /* REGEXP */
355         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
356         "\2++"        "\2--"        NTC                     /* UOPPOST */
357         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
358         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
359         "\2*="        "\2/="        "\2%="      "\2^="
360         "\1+"         "\1-"         "\3**="     "\2**"
361         "\1/"         "\1%"         "\1^"       "\1*"
362         "\2!="        "\2>="        "\2<="      "\1>"
363         "\1<"         "\2!~"        "\1~"       "\2&&"
364         "\2||"        "\1?"         "\1:"       NTC
365         "\2in"        NTC
366         "\1,"         NTC
367         "\1|"         NTC
368         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
369         "\1]"         NTC
370         "\1{"         NTC
371         "\1}"         NTC
372         "\1;"         NTC
373         "\1\n"        NTC
374         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
375         "\10continue" "\6delete"    "\5print"
376         "\6printf"    "\4next"      "\10nextfile"
377         "\6return"    "\4exit"      NTC
378         "\5while"     NTC
379         "\4else"      NTC
380
381         "\3and"       "\5compl"     "\6lshift"  "\2or"
382         "\6rshift"    "\3xor"
383         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
384         "\3cos"       "\3exp"       "\3int"     "\3log"
385         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
386         "\6gensub"    "\4gsub"      "\5index"   "\6length"
387         "\5match"     "\5split"     "\7sprintf" "\3sub"
388         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
389         "\7tolower"   "\7toupper"   NTC
390         "\7getline"   NTC
391         "\4func"      "\10function" NTC
392         "\5BEGIN"     NTC
393         "\3END"
394         /* compiler adds trailing "\0" */
395         ;
396
397 static const uint32_t tokeninfo[] = {
398         0,
399         0,
400         OC_REGEXP,
401         xS|'a',                  xS|'w',                  xS|'|',
402         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
403         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
404         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
405         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
406         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
407         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
408         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
409         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
410         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411         OC_IN|SV|P(49), /* in */
412         OC_COMMA|SS|P(80),
413         OC_PGETLINE|SV|P(37),
414         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
415         0, /* ] */
416         0,
417         0,
418         0,
419         0, /* \n */
420         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
421         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
422         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
423         OC_RETURN|Vx, OC_EXIT|Nx,
424         ST_WHILE,
425         0, /* else */
426
427         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
433         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
434         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
436         OC_GETLINE|SV|P(0),
437         0,                 0,
438         0,
439         0 /* END */
440 };
441
442 /* internal variable names and their initial values       */
443 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
444 enum {
445         CONVFMT,    OFMT,       FS,         OFS,
446         ORS,        RS,         RT,         FILENAME,
447         SUBSEP,     F0,         ARGIND,     ARGC,
448         ARGV,       ERRNO,      FNR,        NR,
449         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
450 };
451
452 static const char vNames[] ALIGN1 =
453         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
454         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
455         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
456         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
457         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
458
459 static const char vValues[] ALIGN1 =
460         "%.6g\0"    "%.6g\0"    " \0"       " \0"
461         "\n\0"      "\n\0"      "\0"        "\0"
462         "\034\0"    "\0"        "\377";
463
464 /* hash size may grow to these values */
465 #define FIRST_PRIME 61
466 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
467
468
469 /* Globals. Split in two parts so that first one is addressed
470  * with (mostly short) negative offsets.
471  * NB: it's unsafe to put members of type "double"
472  * into globals2 (gcc may fail to align them).
473  */
474 struct globals {
475         double t_double;
476         chain beginseq, mainseq, endseq;
477         chain *seq;
478         node *break_ptr, *continue_ptr;
479         rstream *iF;
480         xhash *vhash, *ahash, *fdhash, *fnhash;
481         const char *g_progname;
482         int g_lineno;
483         int nfields;
484         int maxfields; /* used in fsrealloc() only */
485         var *Fields;
486         nvblock *g_cb;
487         char *g_pos;
488         char *g_buf;
489         smallint icase;
490         smallint exiting;
491         smallint nextrec;
492         smallint nextfile;
493         smallint is_f0_split;
494         smallint t_rollback;
495 };
496 struct globals2 {
497         uint32_t t_info; /* often used */
498         uint32_t t_tclass;
499         char *t_string;
500         int t_lineno;
501
502         var *intvar[NUM_INTERNAL_VARS]; /* often used */
503
504         /* former statics from various functions */
505         char *split_f0__fstrings;
506
507         uint32_t next_token__save_tclass;
508         uint32_t next_token__save_info;
509         uint32_t next_token__ltclass;
510         smallint next_token__concat_inserted;
511
512         smallint next_input_file__files_happen;
513         rstream next_input_file__rsm;
514
515         var *evaluate__fnargs;
516         unsigned evaluate__seed;
517         regex_t evaluate__sreg;
518
519         var ptest__v;
520
521         tsplitter exec_builtin__tspl;
522
523         /* biggest and least used members go last */
524         tsplitter fsplitter, rsplitter;
525 };
526 #define G1 (ptr_to_globals[-1])
527 #define G (*(struct globals2 *)ptr_to_globals)
528 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
529 /*char G1size[sizeof(G1)]; - 0x74 */
530 /*char Gsize[sizeof(G)]; - 0x1c4 */
531 /* Trying to keep most of members accessible with short offsets: */
532 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
533 #define t_double     (G1.t_double    )
534 #define beginseq     (G1.beginseq    )
535 #define mainseq      (G1.mainseq     )
536 #define endseq       (G1.endseq      )
537 #define seq          (G1.seq         )
538 #define break_ptr    (G1.break_ptr   )
539 #define continue_ptr (G1.continue_ptr)
540 #define iF           (G1.iF          )
541 #define vhash        (G1.vhash       )
542 #define ahash        (G1.ahash       )
543 #define fdhash       (G1.fdhash      )
544 #define fnhash       (G1.fnhash      )
545 #define g_progname   (G1.g_progname  )
546 #define g_lineno     (G1.g_lineno    )
547 #define nfields      (G1.nfields     )
548 #define maxfields    (G1.maxfields   )
549 #define Fields       (G1.Fields      )
550 #define g_cb         (G1.g_cb        )
551 #define g_pos        (G1.g_pos       )
552 #define g_buf        (G1.g_buf       )
553 #define icase        (G1.icase       )
554 #define exiting      (G1.exiting     )
555 #define nextrec      (G1.nextrec     )
556 #define nextfile     (G1.nextfile    )
557 #define is_f0_split  (G1.is_f0_split )
558 #define t_rollback   (G1.t_rollback  )
559 #define t_info       (G.t_info      )
560 #define t_tclass     (G.t_tclass    )
561 #define t_string     (G.t_string    )
562 #define t_lineno     (G.t_lineno    )
563 #define intvar       (G.intvar      )
564 #define fsplitter    (G.fsplitter   )
565 #define rsplitter    (G.rsplitter   )
566 #define INIT_G() do { \
567         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
568         G.next_token__ltclass = TC_OPTERM; \
569         G.evaluate__seed = 1; \
570 } while (0)
571
572
573 /* function prototypes */
574 static void handle_special(var *);
575 static node *parse_expr(uint32_t);
576 static void chain_group(void);
577 static var *evaluate(node *, var *);
578 static rstream *next_input_file(void);
579 static int fmt_num(char *, int, const char *, double, int);
580 static int awk_exit(int) NORETURN;
581
582 /* ---- error handling ---- */
583
584 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
585 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
586 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
587 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
588 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
589 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
590 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
591 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
592 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
593 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
594
595 static void zero_out_var(var *vp)
596 {
597         memset(vp, 0, sizeof(*vp));
598 }
599
600 static void syntax_error(const char *message) NORETURN;
601 static void syntax_error(const char *message)
602 {
603         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
604 }
605
606 /* ---- hash stuff ---- */
607
608 static unsigned hashidx(const char *name)
609 {
610         unsigned idx = 0;
611
612         while (*name)
613                 idx = *name++ + (idx << 6) - idx;
614         return idx;
615 }
616
617 /* create new hash */
618 static xhash *hash_init(void)
619 {
620         xhash *newhash;
621
622         newhash = xzalloc(sizeof(*newhash));
623         newhash->csize = FIRST_PRIME;
624         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
625
626         return newhash;
627 }
628
629 /* find item in hash, return ptr to data, NULL if not found */
630 static void *hash_search(xhash *hash, const char *name)
631 {
632         hash_item *hi;
633
634         hi = hash->items[hashidx(name) % hash->csize];
635         while (hi) {
636                 if (strcmp(hi->name, name) == 0)
637                         return &hi->data;
638                 hi = hi->next;
639         }
640         return NULL;
641 }
642
643 /* grow hash if it becomes too big */
644 static void hash_rebuild(xhash *hash)
645 {
646         unsigned newsize, i, idx;
647         hash_item **newitems, *hi, *thi;
648
649         if (hash->nprime == ARRAY_SIZE(PRIMES))
650                 return;
651
652         newsize = PRIMES[hash->nprime++];
653         newitems = xzalloc(newsize * sizeof(newitems[0]));
654
655         for (i = 0; i < hash->csize; i++) {
656                 hi = hash->items[i];
657                 while (hi) {
658                         thi = hi;
659                         hi = thi->next;
660                         idx = hashidx(thi->name) % newsize;
661                         thi->next = newitems[idx];
662                         newitems[idx] = thi;
663                 }
664         }
665
666         free(hash->items);
667         hash->csize = newsize;
668         hash->items = newitems;
669 }
670
671 /* find item in hash, add it if necessary. Return ptr to data */
672 static void *hash_find(xhash *hash, const char *name)
673 {
674         hash_item *hi;
675         unsigned idx;
676         int l;
677
678         hi = hash_search(hash, name);
679         if (!hi) {
680                 if (++hash->nel / hash->csize > 10)
681                         hash_rebuild(hash);
682
683                 l = strlen(name) + 1;
684                 hi = xzalloc(sizeof(*hi) + l);
685                 strcpy(hi->name, name);
686
687                 idx = hashidx(name) % hash->csize;
688                 hi->next = hash->items[idx];
689                 hash->items[idx] = hi;
690                 hash->glen += l;
691         }
692         return &hi->data;
693 }
694
695 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
696 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
697 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
698 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
699
700 static void hash_remove(xhash *hash, const char *name)
701 {
702         hash_item *hi, **phi;
703
704         phi = &hash->items[hashidx(name) % hash->csize];
705         while (*phi) {
706                 hi = *phi;
707                 if (strcmp(hi->name, name) == 0) {
708                         hash->glen -= (strlen(name) + 1);
709                         hash->nel--;
710                         *phi = hi->next;
711                         free(hi);
712                         break;
713                 }
714                 phi = &hi->next;
715         }
716 }
717
718 /* ------ some useful functions ------ */
719
720 static char *skip_spaces(char *p)
721 {
722         while (1) {
723                 if (*p == '\\' && p[1] == '\n') {
724                         p++;
725                         t_lineno++;
726                 } else if (*p != ' ' && *p != '\t') {
727                         break;
728                 }
729                 p++;
730         }
731         return p;
732 }
733
734 /* returns old *s, advances *s past word and terminating NUL */
735 static char *nextword(char **s)
736 {
737         char *p = *s;
738         while (*(*s)++ != '\0')
739                 continue;
740         return p;
741 }
742
743 static char nextchar(char **s)
744 {
745         char c, *pps;
746
747         c = *(*s)++;
748         pps = *s;
749         if (c == '\\')
750                 c = bb_process_escape_sequence((const char**)s);
751         /* Example awk statement:
752          * s = "abc\"def"
753          * we must treat \" as "
754          */
755         if (c == '\\' && *s == pps) { /* unrecognized \z? */
756                 c = *(*s); /* yes, fetch z */
757                 if (c)
758                         (*s)++; /* advance unless z = NUL */
759         }
760         return c;
761 }
762
763 /* TODO: merge with strcpy_and_process_escape_sequences()?
764  */
765 static void unescape_string_in_place(char *s1)
766 {
767         char *s = s1;
768         while ((*s1 = nextchar(&s)) != '\0')
769                 s1++;
770 }
771
772 static ALWAYS_INLINE int isalnum_(int c)
773 {
774         return (isalnum(c) || c == '_');
775 }
776
777 static double my_strtod(char **pp)
778 {
779         char *cp = *pp;
780         if (ENABLE_DESKTOP && cp[0] == '0') {
781                 /* Might be hex or octal integer: 0x123abc or 07777 */
782                 char c = (cp[1] | 0x20);
783                 if (c == 'x' || isdigit(cp[1])) {
784                         unsigned long long ull = strtoull(cp, pp, 0);
785                         if (c == 'x')
786                                 return ull;
787                         c = **pp;
788                         if (!isdigit(c) && c != '.')
789                                 return ull;
790                         /* else: it may be a floating number. Examples:
791                          * 009.123 (*pp points to '9')
792                          * 000.123 (*pp points to '.')
793                          * fall through to strtod.
794                          */
795                 }
796         }
797         return strtod(cp, pp);
798 }
799
800 /* -------- working with variables (set/get/copy/etc) -------- */
801
802 static xhash *iamarray(var *v)
803 {
804         var *a = v;
805
806         while (a->type & VF_CHILD)
807                 a = a->x.parent;
808
809         if (!(a->type & VF_ARRAY)) {
810                 a->type |= VF_ARRAY;
811                 a->x.array = hash_init();
812         }
813         return a->x.array;
814 }
815
816 static void clear_array(xhash *array)
817 {
818         unsigned i;
819         hash_item *hi, *thi;
820
821         for (i = 0; i < array->csize; i++) {
822                 hi = array->items[i];
823                 while (hi) {
824                         thi = hi;
825                         hi = hi->next;
826                         free(thi->data.v.string);
827                         free(thi);
828                 }
829                 array->items[i] = NULL;
830         }
831         array->glen = array->nel = 0;
832 }
833
834 /* clear a variable */
835 static var *clrvar(var *v)
836 {
837         if (!(v->type & VF_FSTR))
838                 free(v->string);
839
840         v->type &= VF_DONTTOUCH;
841         v->type |= VF_DIRTY;
842         v->string = NULL;
843         return v;
844 }
845
846 /* assign string value to variable */
847 static var *setvar_p(var *v, char *value)
848 {
849         clrvar(v);
850         v->string = value;
851         handle_special(v);
852         return v;
853 }
854
855 /* same as setvar_p but make a copy of string */
856 static var *setvar_s(var *v, const char *value)
857 {
858         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
859 }
860
861 /* same as setvar_s but sets USER flag */
862 static var *setvar_u(var *v, const char *value)
863 {
864         v = setvar_s(v, value);
865         v->type |= VF_USER;
866         return v;
867 }
868
869 /* set array element to user string */
870 static void setari_u(var *a, int idx, const char *s)
871 {
872         var *v;
873
874         v = findvar(iamarray(a), itoa(idx));
875         setvar_u(v, s);
876 }
877
878 /* assign numeric value to variable */
879 static var *setvar_i(var *v, double value)
880 {
881         clrvar(v);
882         v->type |= VF_NUMBER;
883         v->number = value;
884         handle_special(v);
885         return v;
886 }
887
888 static const char *getvar_s(var *v)
889 {
890         /* if v is numeric and has no cached string, convert it to string */
891         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
892                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
893                 v->string = xstrdup(g_buf);
894                 v->type |= VF_CACHED;
895         }
896         return (v->string == NULL) ? "" : v->string;
897 }
898
899 static double getvar_i(var *v)
900 {
901         char *s;
902
903         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
904                 v->number = 0;
905                 s = v->string;
906                 if (s && *s) {
907                         debug_printf_eval("getvar_i: '%s'->", s);
908                         v->number = my_strtod(&s);
909                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
910                         if (v->type & VF_USER) {
911                                 s = skip_spaces(s);
912                                 if (*s != '\0')
913                                         v->type &= ~VF_USER;
914                         }
915                 } else {
916                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
917                         v->type &= ~VF_USER;
918                 }
919                 v->type |= VF_CACHED;
920         }
921         debug_printf_eval("getvar_i: %f\n", v->number);
922         return v->number;
923 }
924
925 /* Used for operands of bitwise ops */
926 static unsigned long getvar_i_int(var *v)
927 {
928         double d = getvar_i(v);
929
930         /* Casting doubles to longs is undefined for values outside
931          * of target type range. Try to widen it as much as possible */
932         if (d >= 0)
933                 return (unsigned long)d;
934         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
935         return - (long) (unsigned long) (-d);
936 }
937
938 static var *copyvar(var *dest, const var *src)
939 {
940         if (dest != src) {
941                 clrvar(dest);
942                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
943                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
944                 dest->number = src->number;
945                 if (src->string)
946                         dest->string = xstrdup(src->string);
947         }
948         handle_special(dest);
949         return dest;
950 }
951
952 static var *incvar(var *v)
953 {
954         return setvar_i(v, getvar_i(v) + 1.0);
955 }
956
957 /* return true if v is number or numeric string */
958 static int is_numeric(var *v)
959 {
960         getvar_i(v);
961         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
962 }
963
964 /* return 1 when value of v corresponds to true, 0 otherwise */
965 static int istrue(var *v)
966 {
967         if (is_numeric(v))
968                 return (v->number != 0);
969         return (v->string && v->string[0]);
970 }
971
972 /* temporary variables allocator. Last allocated should be first freed */
973 static var *nvalloc(int n)
974 {
975         nvblock *pb = NULL;
976         var *v, *r;
977         int size;
978
979         while (g_cb) {
980                 pb = g_cb;
981                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
982                         break;
983                 g_cb = g_cb->next;
984         }
985
986         if (!g_cb) {
987                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
988                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
989                 g_cb->size = size;
990                 g_cb->pos = g_cb->nv;
991                 g_cb->prev = pb;
992                 /*g_cb->next = NULL; - xzalloc did it */
993                 if (pb)
994                         pb->next = g_cb;
995         }
996
997         v = r = g_cb->pos;
998         g_cb->pos += n;
999
1000         while (v < g_cb->pos) {
1001                 v->type = 0;
1002                 v->string = NULL;
1003                 v++;
1004         }
1005
1006         return r;
1007 }
1008
1009 static void nvfree(var *v)
1010 {
1011         var *p;
1012
1013         if (v < g_cb->nv || v >= g_cb->pos)
1014                 syntax_error(EMSG_INTERNAL_ERROR);
1015
1016         for (p = v; p < g_cb->pos; p++) {
1017                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018                         clear_array(iamarray(p));
1019                         free(p->x.array->items);
1020                         free(p->x.array);
1021                 }
1022                 if (p->type & VF_WALK) {
1023                         walker_list *n;
1024                         walker_list *w = p->x.walker;
1025                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1026                         p->x.walker = NULL;
1027                         while (w) {
1028                                 n = w->prev;
1029                                 debug_printf_walker(" free(%p)\n", w);
1030                                 free(w);
1031                                 w = n;
1032                         }
1033                 }
1034                 clrvar(p);
1035         }
1036
1037         g_cb->pos = v;
1038         while (g_cb->prev && g_cb->pos == g_cb->nv) {
1039                 g_cb = g_cb->prev;
1040         }
1041 }
1042
1043 /* ------- awk program text parsing ------- */
1044
1045 /* Parse next token pointed by global pos, place results into global ttt.
1046  * If token isn't expected, give away. Return token class
1047  */
1048 static uint32_t next_token(uint32_t expected)
1049 {
1050 #define concat_inserted (G.next_token__concat_inserted)
1051 #define save_tclass     (G.next_token__save_tclass)
1052 #define save_info       (G.next_token__save_info)
1053 /* Initialized to TC_OPTERM: */
1054 #define ltclass         (G.next_token__ltclass)
1055
1056         char *p, *s;
1057         const char *tl;
1058         uint32_t tc;
1059         const uint32_t *ti;
1060
1061         if (t_rollback) {
1062                 t_rollback = FALSE;
1063
1064         } else if (concat_inserted) {
1065                 concat_inserted = FALSE;
1066                 t_tclass = save_tclass;
1067                 t_info = save_info;
1068
1069         } else {
1070                 p = g_pos;
1071  readnext:
1072                 p = skip_spaces(p);
1073                 g_lineno = t_lineno;
1074                 if (*p == '#')
1075                         while (*p != '\n' && *p != '\0')
1076                                 p++;
1077
1078                 if (*p == '\n')
1079                         t_lineno++;
1080
1081                 if (*p == '\0') {
1082                         tc = TC_EOF;
1083                         debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1084
1085                 } else if (*p == '\"') {
1086                         /* it's a string */
1087                         t_string = s = ++p;
1088                         while (*p != '\"') {
1089                                 char *pp;
1090                                 if (*p == '\0' || *p == '\n')
1091                                         syntax_error(EMSG_UNEXP_EOS);
1092                                 pp = p;
1093                                 *s++ = nextchar(&pp);
1094                                 p = pp;
1095                         }
1096                         p++;
1097                         *s = '\0';
1098                         tc = TC_STRING;
1099                         debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1100
1101                 } else if ((expected & TC_REGEXP) && *p == '/') {
1102                         /* it's regexp */
1103                         t_string = s = ++p;
1104                         while (*p != '/') {
1105                                 if (*p == '\0' || *p == '\n')
1106                                         syntax_error(EMSG_UNEXP_EOS);
1107                                 *s = *p++;
1108                                 if (*s++ == '\\') {
1109                                         char *pp = p;
1110                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1111                                         if (*p == '\\')
1112                                                 *s++ = '\\';
1113                                         if (pp == p)
1114                                                 *s++ = *p++;
1115                                         else
1116                                                 p = pp;
1117                                 }
1118                         }
1119                         p++;
1120                         *s = '\0';
1121                         tc = TC_REGEXP;
1122                         debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1123
1124                 } else if (*p == '.' || isdigit(*p)) {
1125                         /* it's a number */
1126                         char *pp = p;
1127                         t_double = my_strtod(&pp);
1128                         p = pp;
1129                         if (*p == '.')
1130                                 syntax_error(EMSG_UNEXP_TOKEN);
1131                         tc = TC_NUMBER;
1132                         debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1133
1134                 } else {
1135                         /* search for something known */
1136                         tl = tokenlist;
1137                         tc = 0x00000001;
1138                         ti = tokeninfo;
1139                         while (*tl) {
1140                                 int l = (unsigned char) *tl++;
1141                                 if (l == (unsigned char) NTCC) {
1142                                         tc <<= 1;
1143                                         continue;
1144                                 }
1145                                 /* if token class is expected,
1146                                  * token matches,
1147                                  * and it's not a longer word,
1148                                  */
1149                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150                                  && strncmp(p, tl, l) == 0
1151                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1152                                 ) {
1153                                         /* then this is what we are looking for */
1154                                         t_info = *ti;
1155                                         debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1156                                         p += l;
1157                                         goto token_found;
1158                                 }
1159                                 ti++;
1160                                 tl += l;
1161                         }
1162                         /* not a known token */
1163
1164                         /* is it a name? (var/array/function) */
1165                         if (!isalnum_(*p))
1166                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1167                         /* yes */
1168                         t_string = --p;
1169                         while (isalnum_(*++p)) {
1170                                 p[-1] = *p;
1171                         }
1172                         p[-1] = '\0';
1173                         tc = TC_VARIABLE;
1174                         /* also consume whitespace between functionname and bracket */
1175                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1176                                 p = skip_spaces(p);
1177                         if (*p == '(') {
1178                                 tc = TC_FUNCTION;
1179                                 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1180                         } else {
1181                                 if (*p == '[') {
1182                                         p++;
1183                                         tc = TC_ARRAY;
1184                                         debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1185                                 } else
1186                                         debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1187                         }
1188                 }
1189  token_found:
1190                 g_pos = p;
1191
1192                 /* skipping newlines in some cases */
1193                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1194                         goto readnext;
1195
1196                 /* insert concatenation operator when needed */
1197                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198                         concat_inserted = TRUE;
1199                         save_tclass = tc;
1200                         save_info = t_info;
1201                         tc = TC_BINOP;
1202                         t_info = OC_CONCAT | SS | P(35);
1203                 }
1204
1205                 t_tclass = tc;
1206         }
1207         ltclass = t_tclass;
1208
1209         /* Are we ready for this? */
1210         if (!(ltclass & expected))
1211                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1213
1214         return ltclass;
1215 #undef concat_inserted
1216 #undef save_tclass
1217 #undef save_info
1218 #undef ltclass
1219 }
1220
1221 static void rollback_token(void)
1222 {
1223         t_rollback = TRUE;
1224 }
1225
1226 static node *new_node(uint32_t info)
1227 {
1228         node *n;
1229
1230         n = xzalloc(sizeof(node));
1231         n->info = info;
1232         n->lineno = g_lineno;
1233         return n;
1234 }
1235
1236 static void mk_re_node(const char *s, node *n, regex_t *re)
1237 {
1238         n->info = OC_REGEXP;
1239         n->l.re = re;
1240         n->r.ire = re + 1;
1241         xregcomp(re, s, REG_EXTENDED);
1242         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1243 }
1244
1245 static node *condition(void)
1246 {
1247         next_token(TC_SEQSTART);
1248         return parse_expr(TC_SEQTERM);
1249 }
1250
1251 /* parse expression terminated by given argument, return ptr
1252  * to built subtree. Terminator is eaten by parse_expr */
1253 static node *parse_expr(uint32_t iexp)
1254 {
1255         node sn;
1256         node *cn = &sn;
1257         node *vn, *glptr;
1258         uint32_t tc, xtc;
1259         var *v;
1260
1261         debug_printf_parse("%s(%x)\n", __func__, iexp);
1262
1263         sn.info = PRIMASK;
1264         sn.r.n = glptr = NULL;
1265         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1266
1267         while (!((tc = next_token(xtc)) & iexp)) {
1268
1269                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270                         /* input redirection (<) attached to glptr node */
1271                         debug_printf_parse("%s: input redir\n", __func__);
1272                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1273                         cn->a.n = glptr;
1274                         xtc = TC_OPERAND | TC_UOPPRE;
1275                         glptr = NULL;
1276
1277                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278                         debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279                         /* for binary and postfix-unary operators, jump back over
1280                          * previous operators with higher priority */
1281                         vn = cn;
1282                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1284                         ) {
1285                                 vn = vn->a.n;
1286                         }
1287                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1288                                 t_info += P(6);
1289                         cn = vn->a.n->r.n = new_node(t_info);
1290                         cn->a.n = vn->a.n;
1291                         if (tc & TC_BINOP) {
1292                                 cn->l.n = vn;
1293                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1295                                         /* it's a pipe */
1296                                         next_token(TC_GETLINE);
1297                                         /* give maximum priority to this pipe */
1298                                         cn->info &= ~PRIMASK;
1299                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1300                                 }
1301                         } else {
1302                                 cn->r.n = vn;
1303                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1304                         }
1305                         vn->a.n = cn;
1306
1307                 } else {
1308                         debug_printf_parse("%s: other\n", __func__);
1309                         /* for operands and prefix-unary operators, attach them
1310                          * to last node */
1311                         vn = cn;
1312                         cn = vn->r.n = new_node(t_info);
1313                         cn->a.n = vn;
1314                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1316                                 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318                                 /* one should be very careful with switch on tclass -
1319                                  * only simple tclasses should be used! */
1320                                 switch (tc) {
1321                                 case TC_VARIABLE:
1322                                 case TC_ARRAY:
1323                                         debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1324                                         cn->info = OC_VAR;
1325                                         v = hash_search(ahash, t_string);
1326                                         if (v != NULL) {
1327                                                 cn->info = OC_FNARG;
1328                                                 cn->l.aidx = v->x.aidx;
1329                                         } else {
1330                                                 cn->l.v = newvar(t_string);
1331                                         }
1332                                         if (tc & TC_ARRAY) {
1333                                                 cn->info |= xS;
1334                                                 cn->r.n = parse_expr(TC_ARRTERM);
1335                                         }
1336                                         break;
1337
1338                                 case TC_NUMBER:
1339                                 case TC_STRING:
1340                                         debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1341                                         cn->info = OC_VAR;
1342                                         v = cn->l.v = xzalloc(sizeof(var));
1343                                         if (tc & TC_NUMBER)
1344                                                 setvar_i(v, t_double);
1345                                         else
1346                                                 setvar_s(v, t_string);
1347                                         break;
1348
1349                                 case TC_REGEXP:
1350                                         debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1352                                         break;
1353
1354                                 case TC_FUNCTION:
1355                                         debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1356                                         cn->info = OC_FUNC;
1357                                         cn->r.f = newfunc(t_string);
1358                                         cn->l.n = condition();
1359                                         break;
1360
1361                                 case TC_SEQSTART:
1362                                         debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1364                                         if (!cn)
1365                                                 syntax_error("Empty sequence");
1366                                         cn->a.n = vn;
1367                                         break;
1368
1369                                 case TC_GETLINE:
1370                                         debug_printf_parse("%s: TC_GETLINE\n", __func__);
1371                                         glptr = cn;
1372                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1373                                         break;
1374
1375                                 case TC_BUILTIN:
1376                                         debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377                                         cn->l.n = condition();
1378                                         break;
1379                                 }
1380                         }
1381                 }
1382         }
1383
1384         debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1385         return sn.r.n;
1386 }
1387
1388 /* add node to chain. Return ptr to alloc'd node */
1389 static node *chain_node(uint32_t info)
1390 {
1391         node *n;
1392
1393         if (!seq->first)
1394                 seq->first = seq->last = new_node(0);
1395
1396         if (seq->programname != g_progname) {
1397                 seq->programname = g_progname;
1398                 n = chain_node(OC_NEWSOURCE);
1399                 n->l.new_progname = xstrdup(g_progname);
1400         }
1401
1402         n = seq->last;
1403         n->info = info;
1404         seq->last = n->a.n = new_node(OC_DONE);
1405
1406         return n;
1407 }
1408
1409 static void chain_expr(uint32_t info)
1410 {
1411         node *n;
1412
1413         n = chain_node(info);
1414         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415         if (t_tclass & TC_GRPTERM)
1416                 rollback_token();
1417 }
1418
1419 static node *chain_loop(node *nn)
1420 {
1421         node *n, *n2, *save_brk, *save_cont;
1422
1423         save_brk = break_ptr;
1424         save_cont = continue_ptr;
1425
1426         n = chain_node(OC_BR | Vx);
1427         continue_ptr = new_node(OC_EXEC);
1428         break_ptr = new_node(OC_EXEC);
1429         chain_group();
1430         n2 = chain_node(OC_EXEC | Vx);
1431         n2->l.n = nn;
1432         n2->a.n = n;
1433         continue_ptr->a.n = n2;
1434         break_ptr->a.n = n->r.n = seq->last;
1435
1436         continue_ptr = save_cont;
1437         break_ptr = save_brk;
1438
1439         return n;
1440 }
1441
1442 /* parse group and attach it to chain */
1443 static void chain_group(void)
1444 {
1445         uint32_t c;
1446         node *n, *n2, *n3;
1447
1448         do {
1449                 c = next_token(TC_GRPSEQ);
1450         } while (c & TC_NEWLINE);
1451
1452         if (c & TC_GRPSTART) {
1453                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455                         debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456                         if (t_tclass & TC_NEWLINE)
1457                                 continue;
1458                         rollback_token();
1459                         chain_group();
1460                 }
1461                 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463                 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1464                 rollback_token();
1465                 chain_expr(OC_EXEC | Vx);
1466         } else {
1467                 /* TC_STATEMNT */
1468                 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469                 switch (t_info & OPCLSMASK) {
1470                 case ST_IF:
1471                         debug_printf_parse("%s: ST_IF\n", __func__);
1472                         n = chain_node(OC_BR | Vx);
1473                         n->l.n = condition();
1474                         chain_group();
1475                         n2 = chain_node(OC_EXEC);
1476                         n->r.n = seq->last;
1477                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1478                                 chain_group();
1479                                 n2->a.n = seq->last;
1480                         } else {
1481                                 rollback_token();
1482                         }
1483                         break;
1484
1485                 case ST_WHILE:
1486                         debug_printf_parse("%s: ST_WHILE\n", __func__);
1487                         n2 = condition();
1488                         n = chain_loop(NULL);
1489                         n->l.n = n2;
1490                         break;
1491
1492                 case ST_DO:
1493                         debug_printf_parse("%s: ST_DO\n", __func__);
1494                         n2 = chain_node(OC_EXEC);
1495                         n = chain_loop(NULL);
1496                         n2->a.n = n->a.n;
1497                         next_token(TC_WHILE);
1498                         n->l.n = condition();
1499                         break;
1500
1501                 case ST_FOR:
1502                         debug_printf_parse("%s: ST_FOR\n", __func__);
1503                         next_token(TC_SEQSTART);
1504                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1506                                 if ((n2->info & OPCLSMASK) != OC_IN)
1507                                         syntax_error(EMSG_UNEXP_TOKEN);
1508                                 n = chain_node(OC_WALKINIT | VV);
1509                                 n->l.n = n2->l.n;
1510                                 n->r.n = n2->r.n;
1511                                 n = chain_loop(NULL);
1512                                 n->info = OC_WALKNEXT | Vx;
1513                                 n->l.n = n2->l.n;
1514                         } else {                        /* for (;;) */
1515                                 n = chain_node(OC_EXEC | Vx);
1516                                 n->l.n = n2;
1517                                 n2 = parse_expr(TC_SEMICOL);
1518                                 n3 = parse_expr(TC_SEQTERM);
1519                                 n = chain_loop(n3);
1520                                 n->l.n = n2;
1521                                 if (!n2)
1522                                         n->info = OC_EXEC;
1523                         }
1524                         break;
1525
1526                 case OC_PRINT:
1527                 case OC_PRINTF:
1528                         debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529                         n = chain_node(t_info);
1530                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531                         if (t_tclass & TC_OUTRDR) {
1532                                 n->info |= t_info;
1533                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1534                         }
1535                         if (t_tclass & TC_GRPTERM)
1536                                 rollback_token();
1537                         break;
1538
1539                 case OC_BREAK:
1540                         debug_printf_parse("%s: OC_BREAK\n", __func__);
1541                         n = chain_node(OC_EXEC);
1542                         n->a.n = break_ptr;
1543                         break;
1544
1545                 case OC_CONTINUE:
1546                         debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1547                         n = chain_node(OC_EXEC);
1548                         n->a.n = continue_ptr;
1549                         break;
1550
1551                 /* delete, next, nextfile, return, exit */
1552                 default:
1553                         debug_printf_parse("%s: default\n", __func__);
1554                         chain_expr(t_info);
1555                 }
1556         }
1557 }
1558
1559 static void parse_program(char *p)
1560 {
1561         uint32_t tclass;
1562         node *cn;
1563         func *f;
1564         var *v;
1565
1566         g_pos = p;
1567         t_lineno = 1;
1568         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1569                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1570
1571                 if (tclass & TC_OPTERM) {
1572                         debug_printf_parse("%s: TC_OPTERM\n", __func__);
1573                         continue;
1574                 }
1575
1576                 seq = &mainseq;
1577                 if (tclass & TC_BEGIN) {
1578                         debug_printf_parse("%s: TC_BEGIN\n", __func__);
1579                         seq = &beginseq;
1580                         chain_group();
1581
1582                 } else if (tclass & TC_END) {
1583                         debug_printf_parse("%s: TC_END\n", __func__);
1584                         seq = &endseq;
1585                         chain_group();
1586
1587                 } else if (tclass & TC_FUNCDECL) {
1588                         debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1589                         next_token(TC_FUNCTION);
1590                         g_pos++;
1591                         f = newfunc(t_string);
1592                         f->body.first = NULL;
1593                         f->nargs = 0;
1594                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1595                                 v = findvar(ahash, t_string);
1596                                 v->x.aidx = f->nargs++;
1597
1598                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1599                                         break;
1600                         }
1601                         seq = &f->body;
1602                         chain_group();
1603                         clear_array(ahash);
1604
1605                 } else if (tclass & TC_OPSEQ) {
1606                         debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1607                         rollback_token();
1608                         cn = chain_node(OC_TEST);
1609                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1610                         if (t_tclass & TC_GRPSTART) {
1611                                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1612                                 rollback_token();
1613                                 chain_group();
1614                         } else {
1615                                 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1616                                 chain_node(OC_PRINT);
1617                         }
1618                         cn->r.n = mainseq.last;
1619
1620                 } else /* if (tclass & TC_GRPSTART) */ {
1621                         debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1622                         rollback_token();
1623                         chain_group();
1624                 }
1625         }
1626         debug_printf_parse("%s: TC_EOF\n", __func__);
1627 }
1628
1629
1630 /* -------- program execution part -------- */
1631
1632 static node *mk_splitter(const char *s, tsplitter *spl)
1633 {
1634         regex_t *re, *ire;
1635         node *n;
1636
1637         re = &spl->re[0];
1638         ire = &spl->re[1];
1639         n = &spl->n;
1640         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1641                 regfree(re);
1642                 regfree(ire); // TODO: nuke ire, use re+1?
1643         }
1644         if (s[0] && s[1]) { /* strlen(s) > 1 */
1645                 mk_re_node(s, n, re);
1646         } else {
1647                 n->info = (uint32_t) s[0];
1648         }
1649
1650         return n;
1651 }
1652
1653 /* use node as a regular expression. Supplied with node ptr and regex_t
1654  * storage space. Return ptr to regex (if result points to preg, it should
1655  * be later regfree'd manually
1656  */
1657 static regex_t *as_regex(node *op, regex_t *preg)
1658 {
1659         int cflags;
1660         var *v;
1661         const char *s;
1662
1663         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1664                 return icase ? op->r.ire : op->l.re;
1665         }
1666         v = nvalloc(1);
1667         s = getvar_s(evaluate(op, v));
1668
1669         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1670         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1671          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1672          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1673          * (maybe gsub is not supposed to use REG_EXTENDED?).
1674          */
1675         if (regcomp(preg, s, cflags)) {
1676                 cflags &= ~REG_EXTENDED;
1677                 xregcomp(preg, s, cflags);
1678         }
1679         nvfree(v);
1680         return preg;
1681 }
1682
1683 /* gradually increasing buffer.
1684  * note that we reallocate even if n == old_size,
1685  * and thus there is at least one extra allocated byte.
1686  */
1687 static char* qrealloc(char *b, int n, int *size)
1688 {
1689         if (!b || n >= *size) {
1690                 *size = n + (n>>1) + 80;
1691                 b = xrealloc(b, *size);
1692         }
1693         return b;
1694 }
1695
1696 /* resize field storage space */
1697 static void fsrealloc(int size)
1698 {
1699         int i;
1700
1701         if (size >= maxfields) {
1702                 i = maxfields;
1703                 maxfields = size + 16;
1704                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1705                 for (; i < maxfields; i++) {
1706                         Fields[i].type = VF_SPECIAL;
1707                         Fields[i].string = NULL;
1708                 }
1709         }
1710         /* if size < nfields, clear extra field variables */
1711         for (i = size; i < nfields; i++) {
1712                 clrvar(Fields + i);
1713         }
1714         nfields = size;
1715 }
1716
1717 static int awk_split(const char *s, node *spl, char **slist)
1718 {
1719         int l, n;
1720         char c[4];
1721         char *s1;
1722         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1723
1724         /* in worst case, each char would be a separate field */
1725         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1726         strcpy(s1, s);
1727
1728         c[0] = c[1] = (char)spl->info;
1729         c[2] = c[3] = '\0';
1730         if (*getvar_s(intvar[RS]) == '\0')
1731                 c[2] = '\n';
1732
1733         n = 0;
1734         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1735                 if (!*s)
1736                         return n; /* "": zero fields */
1737                 n++; /* at least one field will be there */
1738                 do {
1739                         l = strcspn(s, c+2); /* len till next NUL or \n */
1740                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1741                          && pmatch[0].rm_so <= l
1742                         ) {
1743                                 l = pmatch[0].rm_so;
1744                                 if (pmatch[0].rm_eo == 0) {
1745                                         l++;
1746                                         pmatch[0].rm_eo++;
1747                                 }
1748                                 n++; /* we saw yet another delimiter */
1749                         } else {
1750                                 pmatch[0].rm_eo = l;
1751                                 if (s[l])
1752                                         pmatch[0].rm_eo++;
1753                         }
1754                         memcpy(s1, s, l);
1755                         /* make sure we remove *all* of the separator chars */
1756                         do {
1757                                 s1[l] = '\0';
1758                         } while (++l < pmatch[0].rm_eo);
1759                         nextword(&s1);
1760                         s += pmatch[0].rm_eo;
1761                 } while (*s);
1762                 return n;
1763         }
1764         if (c[0] == '\0') {  /* null split */
1765                 while (*s) {
1766                         *s1++ = *s++;
1767                         *s1++ = '\0';
1768                         n++;
1769                 }
1770                 return n;
1771         }
1772         if (c[0] != ' ') {  /* single-character split */
1773                 if (icase) {
1774                         c[0] = toupper(c[0]);
1775                         c[1] = tolower(c[1]);
1776                 }
1777                 if (*s1)
1778                         n++;
1779                 while ((s1 = strpbrk(s1, c)) != NULL) {
1780                         *s1++ = '\0';
1781                         n++;
1782                 }
1783                 return n;
1784         }
1785         /* space split */
1786         while (*s) {
1787                 s = skip_whitespace(s);
1788                 if (!*s)
1789                         break;
1790                 n++;
1791                 while (*s && !isspace(*s))
1792                         *s1++ = *s++;
1793                 *s1++ = '\0';
1794         }
1795         return n;
1796 }
1797
1798 static void split_f0(void)
1799 {
1800 /* static char *fstrings; */
1801 #define fstrings (G.split_f0__fstrings)
1802
1803         int i, n;
1804         char *s;
1805
1806         if (is_f0_split)
1807                 return;
1808
1809         is_f0_split = TRUE;
1810         free(fstrings);
1811         fsrealloc(0);
1812         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1813         fsrealloc(n);
1814         s = fstrings;
1815         for (i = 0; i < n; i++) {
1816                 Fields[i].string = nextword(&s);
1817                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1818         }
1819
1820         /* set NF manually to avoid side effects */
1821         clrvar(intvar[NF]);
1822         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1823         intvar[NF]->number = nfields;
1824 #undef fstrings
1825 }
1826
1827 /* perform additional actions when some internal variables changed */
1828 static void handle_special(var *v)
1829 {
1830         int n;
1831         char *b;
1832         const char *sep, *s;
1833         int sl, l, len, i, bsize;
1834
1835         if (!(v->type & VF_SPECIAL))
1836                 return;
1837
1838         if (v == intvar[NF]) {
1839                 n = (int)getvar_i(v);
1840                 fsrealloc(n);
1841
1842                 /* recalculate $0 */
1843                 sep = getvar_s(intvar[OFS]);
1844                 sl = strlen(sep);
1845                 b = NULL;
1846                 len = 0;
1847                 for (i = 0; i < n; i++) {
1848                         s = getvar_s(&Fields[i]);
1849                         l = strlen(s);
1850                         if (b) {
1851                                 memcpy(b+len, sep, sl);
1852                                 len += sl;
1853                         }
1854                         b = qrealloc(b, len+l+sl, &bsize);
1855                         memcpy(b+len, s, l);
1856                         len += l;
1857                 }
1858                 if (b)
1859                         b[len] = '\0';
1860                 setvar_p(intvar[F0], b);
1861                 is_f0_split = TRUE;
1862
1863         } else if (v == intvar[F0]) {
1864                 is_f0_split = FALSE;
1865
1866         } else if (v == intvar[FS]) {
1867                 /*
1868                  * The POSIX-2008 standard says that changing FS should have no effect on the
1869                  * current input line, but only on the next one. The language is:
1870                  *
1871                  * > Before the first reference to a field in the record is evaluated, the record
1872                  * > shall be split into fields, according to the rules in Regular Expressions,
1873                  * > using the value of FS that was current at the time the record was read.
1874                  *
1875                  * So, split up current line before assignment to FS:
1876                  */
1877                 split_f0();
1878
1879                 mk_splitter(getvar_s(v), &fsplitter);
1880
1881         } else if (v == intvar[RS]) {
1882                 mk_splitter(getvar_s(v), &rsplitter);
1883
1884         } else if (v == intvar[IGNORECASE]) {
1885                 icase = istrue(v);
1886
1887         } else {                                /* $n */
1888                 n = getvar_i(intvar[NF]);
1889                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1890                 /* right here v is invalid. Just to note... */
1891         }
1892 }
1893
1894 /* step through func/builtin/etc arguments */
1895 static node *nextarg(node **pn)
1896 {
1897         node *n;
1898
1899         n = *pn;
1900         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1901                 *pn = n->r.n;
1902                 n = n->l.n;
1903         } else {
1904                 *pn = NULL;
1905         }
1906         return n;
1907 }
1908
1909 static void hashwalk_init(var *v, xhash *array)
1910 {
1911         hash_item *hi;
1912         unsigned i;
1913         walker_list *w;
1914         walker_list *prev_walker;
1915
1916         if (v->type & VF_WALK) {
1917                 prev_walker = v->x.walker;
1918         } else {
1919                 v->type |= VF_WALK;
1920                 prev_walker = NULL;
1921         }
1922         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1923
1924         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1925         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1926         w->cur = w->end = w->wbuf;
1927         w->prev = prev_walker;
1928         for (i = 0; i < array->csize; i++) {
1929                 hi = array->items[i];
1930                 while (hi) {
1931                         strcpy(w->end, hi->name);
1932                         nextword(&w->end);
1933                         hi = hi->next;
1934                 }
1935         }
1936 }
1937
1938 static int hashwalk_next(var *v)
1939 {
1940         walker_list *w = v->x.walker;
1941
1942         if (w->cur >= w->end) {
1943                 walker_list *prev_walker = w->prev;
1944
1945                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1946                 free(w);
1947                 v->x.walker = prev_walker;
1948                 return FALSE;
1949         }
1950
1951         setvar_s(v, nextword(&w->cur));
1952         return TRUE;
1953 }
1954
1955 /* evaluate node, return 1 when result is true, 0 otherwise */
1956 static int ptest(node *pattern)
1957 {
1958         /* ptest__v is "static": to save stack space? */
1959         return istrue(evaluate(pattern, &G.ptest__v));
1960 }
1961
1962 /* read next record from stream rsm into a variable v */
1963 static int awk_getline(rstream *rsm, var *v)
1964 {
1965         char *b;
1966         regmatch_t pmatch[2];
1967         int size, a, p, pp = 0;
1968         int fd, so, eo, r, rp;
1969         char c, *m, *s;
1970
1971         debug_printf_eval("entered %s()\n", __func__);
1972
1973         /* we're using our own buffer since we need access to accumulating
1974          * characters
1975          */
1976         fd = fileno(rsm->F);
1977         m = rsm->buffer;
1978         a = rsm->adv;
1979         p = rsm->pos;
1980         size = rsm->size;
1981         c = (char) rsplitter.n.info;
1982         rp = 0;
1983
1984         if (!m)
1985                 m = qrealloc(m, 256, &size);
1986
1987         do {
1988                 b = m + a;
1989                 so = eo = p;
1990                 r = 1;
1991                 if (p > 0) {
1992                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1993                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1994                                                         b, 1, pmatch, 0) == 0) {
1995                                         so = pmatch[0].rm_so;
1996                                         eo = pmatch[0].rm_eo;
1997                                         if (b[eo] != '\0')
1998                                                 break;
1999                                 }
2000                         } else if (c != '\0') {
2001                                 s = strchr(b+pp, c);
2002                                 if (!s)
2003                                         s = memchr(b+pp, '\0', p - pp);
2004                                 if (s) {
2005                                         so = eo = s-b;
2006                                         eo++;
2007                                         break;
2008                                 }
2009                         } else {
2010                                 while (b[rp] == '\n')
2011                                         rp++;
2012                                 s = strstr(b+rp, "\n\n");
2013                                 if (s) {
2014                                         so = eo = s-b;
2015                                         while (b[eo] == '\n')
2016                                                 eo++;
2017                                         if (b[eo] != '\0')
2018                                                 break;
2019                                 }
2020                         }
2021                 }
2022
2023                 if (a > 0) {
2024                         memmove(m, m+a, p+1);
2025                         b = m;
2026                         a = 0;
2027                 }
2028
2029                 m = qrealloc(m, a+p+128, &size);
2030                 b = m + a;
2031                 pp = p;
2032                 p += safe_read(fd, b+p, size-p-1);
2033                 if (p < pp) {
2034                         p = 0;
2035                         r = 0;
2036                         setvar_i(intvar[ERRNO], errno);
2037                 }
2038                 b[p] = '\0';
2039
2040         } while (p > pp);
2041
2042         if (p == 0) {
2043                 r--;
2044         } else {
2045                 c = b[so]; b[so] = '\0';
2046                 setvar_s(v, b+rp);
2047                 v->type |= VF_USER;
2048                 b[so] = c;
2049                 c = b[eo]; b[eo] = '\0';
2050                 setvar_s(intvar[RT], b+so);
2051                 b[eo] = c;
2052         }
2053
2054         rsm->buffer = m;
2055         rsm->adv = a + eo;
2056         rsm->pos = p - eo;
2057         rsm->size = size;
2058
2059         debug_printf_eval("returning from %s(): %d\n", __func__, r);
2060
2061         return r;
2062 }
2063
2064 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2065 {
2066         int r = 0;
2067         char c;
2068         const char *s = format;
2069
2070         if (int_as_int && n == (long long)n) {
2071                 r = snprintf(b, size, "%lld", (long long)n);
2072         } else {
2073                 do { c = *s; } while (c && *++s);
2074                 if (strchr("diouxX", c)) {
2075                         r = snprintf(b, size, format, (int)n);
2076                 } else if (strchr("eEfgG", c)) {
2077                         r = snprintf(b, size, format, n);
2078                 } else {
2079                         syntax_error(EMSG_INV_FMT);
2080                 }
2081         }
2082         return r;
2083 }
2084
2085 /* formatted output into an allocated buffer, return ptr to buffer */
2086 static char *awk_printf(node *n)
2087 {
2088         char *b = NULL;
2089         char *fmt, *s, *f;
2090         const char *s1;
2091         int i, j, incr, bsize;
2092         char c, c1;
2093         var *v, *arg;
2094
2095         v = nvalloc(1);
2096         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2097
2098         i = 0;
2099         while (*f) {
2100                 s = f;
2101                 while (*f && (*f != '%' || *++f == '%'))
2102                         f++;
2103                 while (*f && !isalpha(*f)) {
2104                         if (*f == '*')
2105                                 syntax_error("%*x formats are not supported");
2106                         f++;
2107                 }
2108
2109                 incr = (f - s) + MAXVARFMT;
2110                 b = qrealloc(b, incr + i, &bsize);
2111                 c = *f;
2112                 if (c != '\0')
2113                         f++;
2114                 c1 = *f;
2115                 *f = '\0';
2116                 arg = evaluate(nextarg(&n), v);
2117
2118                 j = i;
2119                 if (c == 'c' || !c) {
2120                         i += sprintf(b+i, s, is_numeric(arg) ?
2121                                         (char)getvar_i(arg) : *getvar_s(arg));
2122                 } else if (c == 's') {
2123                         s1 = getvar_s(arg);
2124                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
2125                         i += sprintf(b+i, s, s1);
2126                 } else {
2127                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2128                 }
2129                 *f = c1;
2130
2131                 /* if there was an error while sprintf, return value is negative */
2132                 if (i < j)
2133                         i = j;
2134         }
2135
2136         free(fmt);
2137         nvfree(v);
2138         b = xrealloc(b, i + 1);
2139         b[i] = '\0';
2140         return b;
2141 }
2142
2143 /* Common substitution routine.
2144  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2145  * store result into (dest), return number of substitutions.
2146  * If nm = 0, replace all matches.
2147  * If src or dst is NULL, use $0.
2148  * If subexp != 0, enable subexpression matching (\1-\9).
2149  */
2150 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2151 {
2152         char *resbuf;
2153         const char *sp;
2154         int match_no, residx, replen, resbufsize;
2155         int regexec_flags;
2156         regmatch_t pmatch[10];
2157         regex_t sreg, *regex;
2158
2159         resbuf = NULL;
2160         residx = 0;
2161         match_no = 0;
2162         regexec_flags = 0;
2163         regex = as_regex(rn, &sreg);
2164         sp = getvar_s(src ? src : intvar[F0]);
2165         replen = strlen(repl);
2166         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2167                 int so = pmatch[0].rm_so;
2168                 int eo = pmatch[0].rm_eo;
2169
2170                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2171                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2172                 memcpy(resbuf + residx, sp, eo);
2173                 residx += eo;
2174                 if (++match_no >= nm) {
2175                         const char *s;
2176                         int nbs;
2177
2178                         /* replace */
2179                         residx -= (eo - so);
2180                         nbs = 0;
2181                         for (s = repl; *s; s++) {
2182                                 char c = resbuf[residx++] = *s;
2183                                 if (c == '\\') {
2184                                         nbs++;
2185                                         continue;
2186                                 }
2187                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2188                                         int j;
2189                                         residx -= ((nbs + 3) >> 1);
2190                                         j = 0;
2191                                         if (c != '&') {
2192                                                 j = c - '0';
2193                                                 nbs++;
2194                                         }
2195                                         if (nbs % 2) {
2196                                                 resbuf[residx++] = c;
2197                                         } else {
2198                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2199                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2200                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2201                                                 residx += n;
2202                                         }
2203                                 }
2204                                 nbs = 0;
2205                         }
2206                 }
2207
2208                 regexec_flags = REG_NOTBOL;
2209                 sp += eo;
2210                 if (match_no == nm)
2211                         break;
2212                 if (eo == so) {
2213                         /* Empty match (e.g. "b*" will match anywhere).
2214                          * Advance by one char. */
2215 //BUG (bug 1333):
2216 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2217 //... and will erroneously match "b" even though it is NOT at the word start.
2218 //we need REG_NOTBOW but it does not exist...
2219 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2220 //it should be able to do it correctly.
2221                         /* Subtle: this is safe only because
2222                          * qrealloc allocated at least one extra byte */
2223                         resbuf[residx] = *sp;
2224                         if (*sp == '\0')
2225                                 goto ret;
2226                         sp++;
2227                         residx++;
2228                 }
2229         }
2230
2231         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2232         strcpy(resbuf + residx, sp);
2233  ret:
2234         //bb_error_msg("end sp:'%s'%p", sp,sp);
2235         setvar_p(dest ? dest : intvar[F0], resbuf);
2236         if (regex == &sreg)
2237                 regfree(regex);
2238         return match_no;
2239 }
2240
2241 static NOINLINE int do_mktime(const char *ds)
2242 {
2243         struct tm then;
2244         int count;
2245
2246         /*memset(&then, 0, sizeof(then)); - not needed */
2247         then.tm_isdst = -1; /* default is unknown */
2248
2249         /* manpage of mktime says these fields are ints,
2250          * so we can sscanf stuff directly into them */
2251         count = sscanf(ds, "%u %u %u %u %u %u %d",
2252                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2253                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2254                 &then.tm_isdst);
2255
2256         if (count < 6
2257          || (unsigned)then.tm_mon < 1
2258          || (unsigned)then.tm_year < 1900
2259         ) {
2260                 return -1;
2261         }
2262
2263         then.tm_mon -= 1;
2264         then.tm_year -= 1900;
2265
2266         return mktime(&then);
2267 }
2268
2269 static NOINLINE var *exec_builtin(node *op, var *res)
2270 {
2271 #define tspl (G.exec_builtin__tspl)
2272
2273         var *tv;
2274         node *an[4];
2275         var *av[4];
2276         const char *as[4];
2277         regmatch_t pmatch[2];
2278         regex_t sreg, *re;
2279         node *spl;
2280         uint32_t isr, info;
2281         int nargs;
2282         time_t tt;
2283         int i, l, ll, n;
2284
2285         tv = nvalloc(4);
2286         isr = info = op->info;
2287         op = op->l.n;
2288
2289         av[2] = av[3] = NULL;
2290         for (i = 0; i < 4 && op; i++) {
2291                 an[i] = nextarg(&op);
2292                 if (isr & 0x09000000)
2293                         av[i] = evaluate(an[i], &tv[i]);
2294                 if (isr & 0x08000000)
2295                         as[i] = getvar_s(av[i]);
2296                 isr >>= 1;
2297         }
2298
2299         nargs = i;
2300         if ((uint32_t)nargs < (info >> 30))
2301                 syntax_error(EMSG_TOO_FEW_ARGS);
2302
2303         info &= OPNMASK;
2304         switch (info) {
2305
2306         case B_a2:
2307                 if (ENABLE_FEATURE_AWK_LIBM)
2308                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2309                 else
2310                         syntax_error(EMSG_NO_MATH);
2311                 break;
2312
2313         case B_sp: {
2314                 char *s, *s1;
2315
2316                 if (nargs > 2) {
2317                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2318                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2319                 } else {
2320                         spl = &fsplitter.n;
2321                 }
2322
2323                 n = awk_split(as[0], spl, &s);
2324                 s1 = s;
2325                 clear_array(iamarray(av[1]));
2326                 for (i = 1; i <= n; i++)
2327                         setari_u(av[1], i, nextword(&s));
2328                 free(s1);
2329                 setvar_i(res, n);
2330                 break;
2331         }
2332
2333         case B_ss: {
2334                 char *s;
2335
2336                 l = strlen(as[0]);
2337                 i = getvar_i(av[1]) - 1;
2338                 if (i > l)
2339                         i = l;
2340                 if (i < 0)
2341                         i = 0;
2342                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2343                 if (n < 0)
2344                         n = 0;
2345                 s = xstrndup(as[0]+i, n);
2346                 setvar_p(res, s);
2347                 break;
2348         }
2349
2350         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2351          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2352         case B_an:
2353                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2354                 break;
2355
2356         case B_co:
2357                 setvar_i(res, ~getvar_i_int(av[0]));
2358                 break;
2359
2360         case B_ls:
2361                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2362                 break;
2363
2364         case B_or:
2365                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2366                 break;
2367
2368         case B_rs:
2369                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2370                 break;
2371
2372         case B_xo:
2373                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2374                 break;
2375
2376         case B_lo:
2377         case B_up: {
2378                 char *s, *s1;
2379                 s1 = s = xstrdup(as[0]);
2380                 while (*s1) {
2381                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2382                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2383                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2384                         s1++;
2385                 }
2386                 setvar_p(res, s);
2387                 break;
2388         }
2389
2390         case B_ix:
2391                 n = 0;
2392                 ll = strlen(as[1]);
2393                 l = strlen(as[0]) - ll;
2394                 if (ll > 0 && l >= 0) {
2395                         if (!icase) {
2396                                 char *s = strstr(as[0], as[1]);
2397                                 if (s)
2398                                         n = (s - as[0]) + 1;
2399                         } else {
2400                                 /* this piece of code is terribly slow and
2401                                  * really should be rewritten
2402                                  */
2403                                 for (i = 0; i <= l; i++) {
2404                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2405                                                 n = i+1;
2406                                                 break;
2407                                         }
2408                                 }
2409                         }
2410                 }
2411                 setvar_i(res, n);
2412                 break;
2413
2414         case B_ti:
2415                 if (nargs > 1)
2416                         tt = getvar_i(av[1]);
2417                 else
2418                         time(&tt);
2419                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2420                 i = strftime(g_buf, MAXVARFMT,
2421                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2422                         localtime(&tt));
2423                 g_buf[i] = '\0';
2424                 setvar_s(res, g_buf);
2425                 break;
2426
2427         case B_mt:
2428                 setvar_i(res, do_mktime(as[0]));
2429                 break;
2430
2431         case B_ma:
2432                 re = as_regex(an[1], &sreg);
2433                 n = regexec(re, as[0], 1, pmatch, 0);
2434                 if (n == 0) {
2435                         pmatch[0].rm_so++;
2436                         pmatch[0].rm_eo++;
2437                 } else {
2438                         pmatch[0].rm_so = 0;
2439                         pmatch[0].rm_eo = -1;
2440                 }
2441                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2442                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2443                 setvar_i(res, pmatch[0].rm_so);
2444                 if (re == &sreg)
2445                         regfree(re);
2446                 break;
2447
2448         case B_ge:
2449                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2450                 break;
2451
2452         case B_gs:
2453                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2454                 break;
2455
2456         case B_su:
2457                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2458                 break;
2459         }
2460
2461         nvfree(tv);
2462         return res;
2463 #undef tspl
2464 }
2465
2466 /*
2467  * Evaluate node - the heart of the program. Supplied with subtree
2468  * and place where to store result. returns ptr to result.
2469  */
2470 #define XC(n) ((n) >> 8)
2471
2472 static var *evaluate(node *op, var *res)
2473 {
2474 /* This procedure is recursive so we should count every byte */
2475 #define fnargs (G.evaluate__fnargs)
2476 /* seed is initialized to 1 */
2477 #define seed   (G.evaluate__seed)
2478 #define sreg   (G.evaluate__sreg)
2479
2480         var *v1;
2481
2482         if (!op)
2483                 return setvar_s(res, NULL);
2484
2485         debug_printf_eval("entered %s()\n", __func__);
2486
2487         v1 = nvalloc(2);
2488
2489         while (op) {
2490                 struct {
2491                         var *v;
2492                         const char *s;
2493                 } L = L; /* for compiler */
2494                 struct {
2495                         var *v;
2496                         const char *s;
2497                 } R = R;
2498                 double L_d = L_d;
2499                 uint32_t opinfo;
2500                 int opn;
2501                 node *op1;
2502
2503                 opinfo = op->info;
2504                 opn = (opinfo & OPNMASK);
2505                 g_lineno = op->lineno;
2506                 op1 = op->l.n;
2507                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2508
2509                 /* execute inevitable things */
2510                 if (opinfo & OF_RES1)
2511                         L.v = evaluate(op1, v1);
2512                 if (opinfo & OF_RES2)
2513                         R.v = evaluate(op->r.n, v1+1);
2514                 if (opinfo & OF_STR1) {
2515                         L.s = getvar_s(L.v);
2516                         debug_printf_eval("L.s:'%s'\n", L.s);
2517                 }
2518                 if (opinfo & OF_STR2) {
2519                         R.s = getvar_s(R.v);
2520                         debug_printf_eval("R.s:'%s'\n", R.s);
2521                 }
2522                 if (opinfo & OF_NUM1) {
2523                         L_d = getvar_i(L.v);
2524                         debug_printf_eval("L_d:%f\n", L_d);
2525                 }
2526
2527                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2528                 switch (XC(opinfo & OPCLSMASK)) {
2529
2530                 /* -- iterative node type -- */
2531
2532                 /* test pattern */
2533                 case XC( OC_TEST ):
2534                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2535                                 /* it's range pattern */
2536                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2537                                         op->info |= OF_CHECKED;
2538                                         if (ptest(op1->r.n))
2539                                                 op->info &= ~OF_CHECKED;
2540                                         op = op->a.n;
2541                                 } else {
2542                                         op = op->r.n;
2543                                 }
2544                         } else {
2545                                 op = ptest(op1) ? op->a.n : op->r.n;
2546                         }
2547                         break;
2548
2549                 /* just evaluate an expression, also used as unconditional jump */
2550                 case XC( OC_EXEC ):
2551                         break;
2552
2553                 /* branch, used in if-else and various loops */
2554                 case XC( OC_BR ):
2555                         op = istrue(L.v) ? op->a.n : op->r.n;
2556                         break;
2557
2558                 /* initialize for-in loop */
2559                 case XC( OC_WALKINIT ):
2560                         hashwalk_init(L.v, iamarray(R.v));
2561                         break;
2562
2563                 /* get next array item */
2564                 case XC( OC_WALKNEXT ):
2565                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2566                         break;
2567
2568                 case XC( OC_PRINT ):
2569                 case XC( OC_PRINTF ): {
2570                         FILE *F = stdout;
2571
2572                         if (op->r.n) {
2573                                 rstream *rsm = newfile(R.s);
2574                                 if (!rsm->F) {
2575                                         if (opn == '|') {
2576                                                 rsm->F = popen(R.s, "w");
2577                                                 if (rsm->F == NULL)
2578                                                         bb_perror_msg_and_die("popen");
2579                                                 rsm->is_pipe = 1;
2580                                         } else {
2581                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2582                                         }
2583                                 }
2584                                 F = rsm->F;
2585                         }
2586
2587                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2588                                 if (!op1) {
2589                                         fputs(getvar_s(intvar[F0]), F);
2590                                 } else {
2591                                         while (op1) {
2592                                                 var *v = evaluate(nextarg(&op1), v1);
2593                                                 if (v->type & VF_NUMBER) {
2594                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2595                                                                         getvar_i(v), TRUE);
2596                                                         fputs(g_buf, F);
2597                                                 } else {
2598                                                         fputs(getvar_s(v), F);
2599                                                 }
2600
2601                                                 if (op1)
2602                                                         fputs(getvar_s(intvar[OFS]), F);
2603                                         }
2604                                 }
2605                                 fputs(getvar_s(intvar[ORS]), F);
2606
2607                         } else {        /* OC_PRINTF */
2608                                 char *s = awk_printf(op1);
2609                                 fputs(s, F);
2610                                 free(s);
2611                         }
2612                         fflush(F);
2613                         break;
2614                 }
2615
2616                 case XC( OC_DELETE ): {
2617                         uint32_t info = op1->info & OPCLSMASK;
2618                         var *v;
2619
2620                         if (info == OC_VAR) {
2621                                 v = op1->l.v;
2622                         } else if (info == OC_FNARG) {
2623                                 v = &fnargs[op1->l.aidx];
2624                         } else {
2625                                 syntax_error(EMSG_NOT_ARRAY);
2626                         }
2627
2628                         if (op1->r.n) {
2629                                 const char *s;
2630                                 clrvar(L.v);
2631                                 s = getvar_s(evaluate(op1->r.n, v1));
2632                                 hash_remove(iamarray(v), s);
2633                         } else {
2634                                 clear_array(iamarray(v));
2635                         }
2636                         break;
2637                 }
2638
2639                 case XC( OC_NEWSOURCE ):
2640                         g_progname = op->l.new_progname;
2641                         break;
2642
2643                 case XC( OC_RETURN ):
2644                         copyvar(res, L.v);
2645                         break;
2646
2647                 case XC( OC_NEXTFILE ):
2648                         nextfile = TRUE;
2649                 case XC( OC_NEXT ):
2650                         nextrec = TRUE;
2651                 case XC( OC_DONE ):
2652                         clrvar(res);
2653                         break;
2654
2655                 case XC( OC_EXIT ):
2656                         awk_exit(L_d);
2657
2658                 /* -- recursive node type -- */
2659
2660                 case XC( OC_VAR ):
2661                         L.v = op->l.v;
2662                         if (L.v == intvar[NF])
2663                                 split_f0();
2664                         goto v_cont;
2665
2666                 case XC( OC_FNARG ):
2667                         L.v = &fnargs[op->l.aidx];
2668  v_cont:
2669                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2670                         break;
2671
2672                 case XC( OC_IN ):
2673                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2674                         break;
2675
2676                 case XC( OC_REGEXP ):
2677                         op1 = op;
2678                         L.s = getvar_s(intvar[F0]);
2679                         goto re_cont;
2680
2681                 case XC( OC_MATCH ):
2682                         op1 = op->r.n;
2683  re_cont:
2684                         {
2685                                 regex_t *re = as_regex(op1, &sreg);
2686                                 int i = regexec(re, L.s, 0, NULL, 0);
2687                                 if (re == &sreg)
2688                                         regfree(re);
2689                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2690                         }
2691                         break;
2692
2693                 case XC( OC_MOVE ):
2694                         debug_printf_eval("MOVE\n");
2695                         /* if source is a temporary string, jusk relink it to dest */
2696 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2697 //then L.v ends up being a string, which is wrong
2698 //                      if (R.v == v1+1 && R.v->string) {
2699 //                              res = setvar_p(L.v, R.v->string);
2700 //                              R.v->string = NULL;
2701 //                      } else {
2702                                 res = copyvar(L.v, R.v);
2703 //                      }
2704                         break;
2705
2706                 case XC( OC_TERNARY ):
2707                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2708                                 syntax_error(EMSG_POSSIBLE_ERROR);
2709                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2710                         break;
2711
2712                 case XC( OC_FUNC ): {
2713                         var *vbeg, *v;
2714                         const char *sv_progname;
2715
2716                         /* The body might be empty, still has to eval the args */
2717                         if (!op->r.n->info && !op->r.f->body.first)
2718                                 syntax_error(EMSG_UNDEF_FUNC);
2719
2720                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2721                         while (op1) {
2722                                 var *arg = evaluate(nextarg(&op1), v1);
2723                                 copyvar(v, arg);
2724                                 v->type |= VF_CHILD;
2725                                 v->x.parent = arg;
2726                                 if (++v - vbeg >= op->r.f->nargs)
2727                                         break;
2728                         }
2729
2730                         v = fnargs;
2731                         fnargs = vbeg;
2732                         sv_progname = g_progname;
2733
2734                         res = evaluate(op->r.f->body.first, res);
2735
2736                         g_progname = sv_progname;
2737                         nvfree(fnargs);
2738                         fnargs = v;
2739
2740                         break;
2741                 }
2742
2743                 case XC( OC_GETLINE ):
2744                 case XC( OC_PGETLINE ): {
2745                         rstream *rsm;
2746                         int i;
2747
2748                         if (op1) {
2749                                 rsm = newfile(L.s);
2750                                 if (!rsm->F) {
2751                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2752                                                 rsm->F = popen(L.s, "r");
2753                                                 rsm->is_pipe = TRUE;
2754                                         } else {
2755                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2756                                         }
2757                                 }
2758                         } else {
2759                                 if (!iF)
2760                                         iF = next_input_file();
2761                                 rsm = iF;
2762                         }
2763
2764                         if (!rsm || !rsm->F) {
2765                                 setvar_i(intvar[ERRNO], errno);
2766                                 setvar_i(res, -1);
2767                                 break;
2768                         }
2769
2770                         if (!op->r.n)
2771                                 R.v = intvar[F0];
2772
2773                         i = awk_getline(rsm, R.v);
2774                         if (i > 0 && !op1) {
2775                                 incvar(intvar[FNR]);
2776                                 incvar(intvar[NR]);
2777                         }
2778                         setvar_i(res, i);
2779                         break;
2780                 }
2781
2782                 /* simple builtins */
2783                 case XC( OC_FBLTIN ): {
2784                         double R_d = R_d; /* for compiler */
2785
2786                         switch (opn) {
2787                         case F_in:
2788                                 R_d = (long long)L_d;
2789                                 break;
2790
2791                         case F_rn:
2792                                 R_d = (double)rand() / (double)RAND_MAX;
2793                                 break;
2794
2795                         case F_co:
2796                                 if (ENABLE_FEATURE_AWK_LIBM) {
2797                                         R_d = cos(L_d);
2798                                         break;
2799                                 }
2800
2801                         case F_ex:
2802                                 if (ENABLE_FEATURE_AWK_LIBM) {
2803                                         R_d = exp(L_d);
2804                                         break;
2805                                 }
2806
2807                         case F_lg:
2808                                 if (ENABLE_FEATURE_AWK_LIBM) {
2809                                         R_d = log(L_d);
2810                                         break;
2811                                 }
2812
2813                         case F_si:
2814                                 if (ENABLE_FEATURE_AWK_LIBM) {
2815                                         R_d = sin(L_d);
2816                                         break;
2817                                 }
2818
2819                         case F_sq:
2820                                 if (ENABLE_FEATURE_AWK_LIBM) {
2821                                         R_d = sqrt(L_d);
2822                                         break;
2823                                 }
2824
2825                                 syntax_error(EMSG_NO_MATH);
2826                                 break;
2827
2828                         case F_sr:
2829                                 R_d = (double)seed;
2830                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2831                                 srand(seed);
2832                                 break;
2833
2834                         case F_ti:
2835                                 R_d = time(NULL);
2836                                 break;
2837
2838                         case F_le:
2839                                 debug_printf_eval("length: L.s:'%s'\n", L.s);
2840                                 if (!op1) {
2841                                         L.s = getvar_s(intvar[F0]);
2842                                         debug_printf_eval("length: L.s='%s'\n", L.s);
2843                                 }
2844                                 else if (L.v->type & VF_ARRAY) {
2845                                         R_d = L.v->x.array->nel;
2846                                         debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2847                                         break;
2848                                 }
2849                                 R_d = strlen(L.s);
2850                                 break;
2851
2852                         case F_sy:
2853                                 fflush_all();
2854                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2855                                                 ? (system(L.s) >> 8) : 0;
2856                                 break;
2857
2858                         case F_ff:
2859                                 if (!op1) {
2860                                         fflush(stdout);
2861                                 } else if (L.s && *L.s) {
2862                                         rstream *rsm = newfile(L.s);
2863                                         fflush(rsm->F);
2864                                 } else {
2865                                         fflush_all();
2866                                 }
2867                                 break;
2868
2869                         case F_cl: {
2870                                 rstream *rsm;
2871                                 int err = 0;
2872                                 rsm = (rstream *)hash_search(fdhash, L.s);
2873                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2874                                 if (rsm) {
2875                                         debug_printf_eval("OC_FBLTIN F_cl "
2876                                                 "rsm->is_pipe:%d, ->F:%p\n",
2877                                                 rsm->is_pipe, rsm->F);
2878                                         /* Can be NULL if open failed. Example:
2879                                          * getline line <"doesnt_exist";
2880                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2881                                          */
2882                                         if (rsm->F)
2883                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2884                                         free(rsm->buffer);
2885                                         hash_remove(fdhash, L.s);
2886                                 }
2887                                 if (err)
2888                                         setvar_i(intvar[ERRNO], errno);
2889                                 R_d = (double)err;
2890                                 break;
2891                         }
2892                         } /* switch */
2893                         setvar_i(res, R_d);
2894                         break;
2895                 }
2896
2897                 case XC( OC_BUILTIN ):
2898                         res = exec_builtin(op, res);
2899                         break;
2900
2901                 case XC( OC_SPRINTF ):
2902                         setvar_p(res, awk_printf(op1));
2903                         break;
2904
2905                 case XC( OC_UNARY ): {
2906                         double Ld, R_d;
2907
2908                         Ld = R_d = getvar_i(R.v);
2909                         switch (opn) {
2910                         case 'P':
2911                                 Ld = ++R_d;
2912                                 goto r_op_change;
2913                         case 'p':
2914                                 R_d++;
2915                                 goto r_op_change;
2916                         case 'M':
2917                                 Ld = --R_d;
2918                                 goto r_op_change;
2919                         case 'm':
2920                                 R_d--;
2921  r_op_change:
2922                                 setvar_i(R.v, R_d);
2923                                 break;
2924                         case '!':
2925                                 Ld = !istrue(R.v);
2926                                 break;
2927                         case '-':
2928                                 Ld = -R_d;
2929                                 break;
2930                         }
2931                         setvar_i(res, Ld);
2932                         break;
2933                 }
2934
2935                 case XC( OC_FIELD ): {
2936                         int i = (int)getvar_i(R.v);
2937                         if (i == 0) {
2938                                 res = intvar[F0];
2939                         } else {
2940                                 split_f0();
2941                                 if (i > nfields)
2942                                         fsrealloc(i);
2943                                 res = &Fields[i - 1];
2944                         }
2945                         break;
2946                 }
2947
2948                 /* concatenation (" ") and index joining (",") */
2949                 case XC( OC_CONCAT ):
2950                 case XC( OC_COMMA ): {
2951                         const char *sep = "";
2952                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2953                                 sep = getvar_s(intvar[SUBSEP]);
2954                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2955                         break;
2956                 }
2957
2958                 case XC( OC_LAND ):
2959                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2960                         break;
2961
2962                 case XC( OC_LOR ):
2963                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2964                         break;
2965
2966                 case XC( OC_BINARY ):
2967                 case XC( OC_REPLACE ): {
2968                         double R_d = getvar_i(R.v);
2969                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2970                         switch (opn) {
2971                         case '+':
2972                                 L_d += R_d;
2973                                 break;
2974                         case '-':
2975                                 L_d -= R_d;
2976                                 break;
2977                         case '*':
2978                                 L_d *= R_d;
2979                                 break;
2980                         case '/':
2981                                 if (R_d == 0)
2982                                         syntax_error(EMSG_DIV_BY_ZERO);
2983                                 L_d /= R_d;
2984                                 break;
2985                         case '&':
2986                                 if (ENABLE_FEATURE_AWK_LIBM)
2987                                         L_d = pow(L_d, R_d);
2988                                 else
2989                                         syntax_error(EMSG_NO_MATH);
2990                                 break;
2991                         case '%':
2992                                 if (R_d == 0)
2993                                         syntax_error(EMSG_DIV_BY_ZERO);
2994                                 L_d -= (long long)(L_d / R_d) * R_d;
2995                                 break;
2996                         }
2997                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2998                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2999                         break;
3000                 }
3001
3002                 case XC( OC_COMPARE ): {
3003                         int i = i; /* for compiler */
3004                         double Ld;
3005
3006                         if (is_numeric(L.v) && is_numeric(R.v)) {
3007                                 Ld = getvar_i(L.v) - getvar_i(R.v);
3008                         } else {
3009                                 const char *l = getvar_s(L.v);
3010                                 const char *r = getvar_s(R.v);
3011                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3012                         }
3013                         switch (opn & 0xfe) {
3014                         case 0:
3015                                 i = (Ld > 0);
3016                                 break;
3017                         case 2:
3018                                 i = (Ld >= 0);
3019                                 break;
3020                         case 4:
3021                                 i = (Ld == 0);
3022                                 break;
3023                         }
3024                         setvar_i(res, (i == 0) ^ (opn & 1));
3025                         break;
3026                 }
3027
3028                 default:
3029                         syntax_error(EMSG_POSSIBLE_ERROR);
3030                 }
3031                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3032                         op = op->a.n;
3033                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3034                         break;
3035                 if (nextrec)
3036                         break;
3037         } /* while (op) */
3038
3039         nvfree(v1);
3040         debug_printf_eval("returning from %s(): %p\n", __func__, res);
3041         return res;
3042 #undef fnargs
3043 #undef seed
3044 #undef sreg
3045 }
3046
3047
3048 /* -------- main & co. -------- */
3049
3050 static int awk_exit(int r)
3051 {
3052         var tv;
3053         unsigned i;
3054         hash_item *hi;
3055
3056         zero_out_var(&tv);
3057
3058         if (!exiting) {
3059                 exiting = TRUE;
3060                 nextrec = FALSE;
3061                 evaluate(endseq.first, &tv);
3062         }
3063
3064         /* waiting for children */
3065         for (i = 0; i < fdhash->csize; i++) {
3066                 hi = fdhash->items[i];
3067                 while (hi) {
3068                         if (hi->data.rs.F && hi->data.rs.is_pipe)
3069                                 pclose(hi->data.rs.F);
3070                         hi = hi->next;
3071                 }
3072         }
3073
3074         exit(r);
3075 }
3076
3077 /* if expr looks like "var=value", perform assignment and return 1,
3078  * otherwise return 0 */
3079 static int is_assignment(const char *expr)
3080 {
3081         char *exprc, *val;
3082
3083         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3084                 return FALSE;
3085         }
3086
3087         exprc = xstrdup(expr);
3088         val = exprc + (val - expr);
3089         *val++ = '\0';
3090
3091         unescape_string_in_place(val);
3092         setvar_u(newvar(exprc), val);
3093         free(exprc);
3094         return TRUE;
3095 }
3096
3097 /* switch to next input file */
3098 static rstream *next_input_file(void)
3099 {
3100 #define rsm          (G.next_input_file__rsm)
3101 #define files_happen (G.next_input_file__files_happen)
3102
3103         FILE *F;
3104         const char *fname, *ind;
3105
3106         if (rsm.F)
3107                 fclose(rsm.F);
3108         rsm.F = NULL;
3109         rsm.pos = rsm.adv = 0;
3110
3111         for (;;) {
3112                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3113                         if (files_happen)
3114                                 return NULL;
3115                         fname = "-";
3116                         F = stdin;
3117                         break;
3118                 }
3119                 ind = getvar_s(incvar(intvar[ARGIND]));
3120                 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3121                 if (fname && *fname && !is_assignment(fname)) {
3122                         F = xfopen_stdin(fname);
3123                         break;
3124                 }
3125         }
3126
3127         files_happen = TRUE;
3128         setvar_s(intvar[FILENAME], fname);
3129         rsm.F = F;
3130         return &rsm;
3131 #undef rsm
3132 #undef files_happen
3133 }
3134
3135 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3136 int awk_main(int argc, char **argv)
3137 {
3138         unsigned opt;
3139         char *opt_F;
3140         llist_t *list_v = NULL;
3141         llist_t *list_f = NULL;
3142 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3143         llist_t *list_e = NULL;
3144 #endif
3145         int i, j;
3146         var *v;
3147         var tv;
3148         char **envp;
3149         char *vnames = (char *)vNames; /* cheat */
3150         char *vvalues = (char *)vValues;
3151
3152         INIT_G();
3153
3154         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3155          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3156         if (ENABLE_LOCALE_SUPPORT)
3157                 setlocale(LC_NUMERIC, "C");
3158
3159         zero_out_var(&tv);
3160
3161         /* allocate global buffer */
3162         g_buf = xmalloc(MAXVARFMT + 1);
3163
3164         vhash = hash_init();
3165         ahash = hash_init();
3166         fdhash = hash_init();
3167         fnhash = hash_init();
3168
3169         /* initialize variables */
3170         for (i = 0; *vnames; i++) {
3171                 intvar[i] = v = newvar(nextword(&vnames));
3172                 if (*vvalues != '\377')
3173                         setvar_s(v, nextword(&vvalues));
3174                 else
3175                         setvar_i(v, 0);
3176
3177                 if (*vnames == '*') {
3178                         v->type |= VF_SPECIAL;
3179                         vnames++;
3180                 }
3181         }
3182
3183         handle_special(intvar[FS]);
3184         handle_special(intvar[RS]);
3185
3186         newfile("/dev/stdin")->F = stdin;
3187         newfile("/dev/stdout")->F = stdout;
3188         newfile("/dev/stderr")->F = stderr;
3189
3190         /* Huh, people report that sometimes environ is NULL. Oh well. */
3191         if (environ) for (envp = environ; *envp; envp++) {
3192                 /* environ is writable, thus we don't strdup it needlessly */
3193                 char *s = *envp;
3194                 char *s1 = strchr(s, '=');
3195                 if (s1) {
3196                         *s1 = '\0';
3197                         /* Both findvar and setvar_u take const char*
3198                          * as 2nd arg -> environment is not trashed */
3199                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3200                         *s1 = '=';
3201                 }
3202         }
3203         opt_complementary = OPTCOMPLSTR_AWK;
3204         opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3205         argv += optind;
3206         argc -= optind;
3207         if (opt & OPT_W)
3208                 bb_error_msg("warning: option -W is ignored");
3209         if (opt & OPT_F) {
3210                 unescape_string_in_place(opt_F);
3211                 setvar_s(intvar[FS], opt_F);
3212         }
3213         while (list_v) {
3214                 if (!is_assignment(llist_pop(&list_v)))
3215                         bb_show_usage();
3216         }
3217         while (list_f) {
3218                 char *s = NULL;
3219                 FILE *from_file;
3220
3221                 g_progname = llist_pop(&list_f);
3222                 from_file = xfopen_stdin(g_progname);
3223                 /* one byte is reserved for some trick in next_token */
3224                 for (i = j = 1; j > 0; i += j) {
3225                         s = xrealloc(s, i + 4096);
3226                         j = fread(s + i, 1, 4094, from_file);
3227                 }
3228                 s[i] = '\0';
3229                 fclose(from_file);
3230                 parse_program(s + 1);
3231                 free(s);
3232         }
3233         g_progname = "cmd. line";
3234 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3235         while (list_e) {
3236                 parse_program(llist_pop(&list_e));
3237         }
3238 #endif
3239         if (!(opt & (OPT_f | OPT_e))) {
3240                 if (!*argv)
3241                         bb_show_usage();
3242                 parse_program(*argv++);
3243                 argc--;
3244         }
3245
3246         /* fill in ARGV array */
3247         setvar_i(intvar[ARGC], argc + 1);
3248         setari_u(intvar[ARGV], 0, "awk");
3249         i = 0;
3250         while (*argv)
3251                 setari_u(intvar[ARGV], ++i, *argv++);
3252
3253         evaluate(beginseq.first, &tv);
3254         if (!mainseq.first && !endseq.first)
3255                 awk_exit(EXIT_SUCCESS);
3256
3257         /* input file could already be opened in BEGIN block */
3258         if (!iF)
3259                 iF = next_input_file();
3260
3261         /* passing through input files */
3262         while (iF) {
3263                 nextfile = FALSE;
3264                 setvar_i(intvar[FNR], 0);
3265
3266                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3267                         nextrec = FALSE;
3268                         incvar(intvar[NR]);
3269                         incvar(intvar[FNR]);
3270                         evaluate(mainseq.first, &tv);
3271
3272                         if (nextfile)
3273                                 break;
3274                 }
3275
3276                 if (i < 0)
3277                         syntax_error(strerror(errno));
3278
3279                 iF = next_input_file();
3280         }
3281
3282         awk_exit(EXIT_SUCCESS);
3283         /*return 0;*/
3284 }