whitespace cleanup. no code changes
[platform/upstream/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //usage:#define awk_trivial_usage
11 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage:       "        -v VAR=VAL      Set variable"
14 //usage:     "\n        -F SEP          Use SEP as field separator"
15 //usage:     "\n        -f FILE         Read program from FILE"
16
17 #include "libbb.h"
18 #include "xregex.h"
19 #include <math.h>
20
21 /* This is a NOEXEC applet. Be very careful! */
22
23
24 /* If you comment out one of these below, it will be #defined later
25  * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...)  do {} while (0)
27 #define debug_printf_eval(...)  do {} while (0)
28 #define debug_printf_parse(...)  do {} while (0)
29
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #endif
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #endif
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38 #endif
39
40
41
42 #define MAXVARFMT       240
43 #define MINNVBLOCK      64
44
45 /* variable flags */
46 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
47 #define VF_ARRAY        0x0002  /* 1 = it's an array */
48
49 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
50 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
52 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
56
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
59
60 typedef struct walker_list {
61         char *end;
62         char *cur;
63         struct walker_list *prev;
64         char wbuf[1];
65 } walker_list;
66
67 /* Variable */
68 typedef struct var_s {
69         unsigned type;            /* flags */
70         double number;
71         char *string;
72         union {
73                 int aidx;               /* func arg idx (for compilation stage) */
74                 struct xhash_s *array;  /* array ptr */
75                 struct var_s *parent;   /* for func args, ptr to actual parameter */
76                 walker_list *walker;    /* list of array elements (for..in) */
77         } x;
78 } var;
79
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
82         struct node_s *first;
83         struct node_s *last;
84         const char *programname;
85 } chain;
86
87 /* Function */
88 typedef struct func_s {
89         unsigned nargs;
90         struct chain_s body;
91 } func;
92
93 /* I/O stream */
94 typedef struct rstream_s {
95         FILE *F;
96         char *buffer;
97         int adv;
98         int size;
99         int pos;
100         smallint is_pipe;
101 } rstream;
102
103 typedef struct hash_item_s {
104         union {
105                 struct var_s v;         /* variable/array hash */
106                 struct rstream_s rs;    /* redirect streams hash */
107                 struct func_s f;        /* functions hash */
108         } data;
109         struct hash_item_s *next;       /* next in chain */
110         char name[1];                   /* really it's longer */
111 } hash_item;
112
113 typedef struct xhash_s {
114         unsigned nel;           /* num of elements */
115         unsigned csize;         /* current hash size */
116         unsigned nprime;        /* next hash size in PRIMES[] */
117         unsigned glen;          /* summary length of item names */
118         struct hash_item_s **items;
119 } xhash;
120
121 /* Tree node */
122 typedef struct node_s {
123         uint32_t info;
124         unsigned lineno;
125         union {
126                 struct node_s *n;
127                 var *v;
128                 int aidx;
129                 char *new_progname;
130                 regex_t *re;
131         } l;
132         union {
133                 struct node_s *n;
134                 regex_t *ire;
135                 func *f;
136         } r;
137         union {
138                 struct node_s *n;
139         } a;
140 } node;
141
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
144         int size;
145         var *pos;
146         struct nvblock_s *prev;
147         struct nvblock_s *next;
148         var nv[];
149 } nvblock;
150
151 typedef struct tsplitter_s {
152         node n;
153         regex_t re[2];
154 } tsplitter;
155
156 /* simple token classes */
157 /* Order and hex values are very important!!!  See next_token() */
158 #define TC_SEQSTART     1                       /* ( */
159 #define TC_SEQTERM      (1 << 1)                /* ) */
160 #define TC_REGEXP       (1 << 2)                /* /.../ */
161 #define TC_OUTRDR       (1 << 3)                /* | > >> */
162 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
163 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
164 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
165 #define TC_IN           (1 << 7)
166 #define TC_COMMA        (1 << 8)
167 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
168 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
169 #define TC_ARRTERM      (1 << 11)               /* ] */
170 #define TC_GRPSTART     (1 << 12)               /* { */
171 #define TC_GRPTERM      (1 << 13)               /* } */
172 #define TC_SEMICOL      (1 << 14)
173 #define TC_NEWLINE      (1 << 15)
174 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
175 #define TC_WHILE        (1 << 17)
176 #define TC_ELSE         (1 << 18)
177 #define TC_BUILTIN      (1 << 19)
178 #define TC_GETLINE      (1 << 20)
179 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
180 #define TC_BEGIN        (1 << 22)
181 #define TC_END          (1 << 23)
182 #define TC_EOF          (1 << 24)
183 #define TC_VARIABLE     (1 << 25)
184 #define TC_ARRAY        (1 << 26)
185 #define TC_FUNCTION     (1 << 27)
186 #define TC_STRING       (1 << 28)
187 #define TC_NUMBER       (1 << 29)
188
189 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
190
191 /* combined token classes */
192 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
196
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
199
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
203
204 /* discard newlines after these */
205 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206                    | TC_BINOP | TC_OPTERM)
207
208 /* what can expression begin with */
209 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
212
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
218
219 #define OF_RES1    0x010000
220 #define OF_RES2    0x020000
221 #define OF_STR1    0x040000
222 #define OF_STR2    0x080000
223 #define OF_NUM1    0x100000
224 #define OF_CHECKED 0x200000
225
226 /* combined operator flags */
227 #define xx      0
228 #define xV      OF_RES2
229 #define xS      (OF_RES2 | OF_STR2)
230 #define Vx      OF_RES1
231 #define VV      (OF_RES1 | OF_RES2)
232 #define Nx      (OF_RES1 | OF_NUM1)
233 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx      (OF_RES1 | OF_STR1)
235 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
237
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK   0x007F
240
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
244  */
245 #undef P
246 #undef PRIMASK
247 #undef PRIMASK2
248 #define P(x)      (x << 24)
249 #define PRIMASK   0x7F000000
250 #define PRIMASK2  0x7E000000
251
252 /* Operation classes */
253
254 #define SHIFT_TIL_THIS  0x0600
255 #define RECUR_FROM_THIS 0x1000
256
257 enum {
258         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
259         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
260
261         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
262         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
263         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
264
265         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
266         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
267         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
268         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
269         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
270         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
271         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
272         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
273         OC_DONE = 0x2800,
274
275         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
276         ST_WHILE = 0x3300
277 };
278
279 /* simple builtins */
280 enum {
281         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
282         F_ti,   F_le,   F_sy,   F_ff,   F_cl
283 };
284
285 /* builtins */
286 enum {
287         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
288         B_ge,   B_gs,   B_su,
289         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
290 };
291
292 /* tokens and their corresponding info values */
293
294 #define NTC     "\377"  /* switch to next token class (tc<<1) */
295 #define NTCC    '\377'
296
297 #define OC_B  OC_BUILTIN
298
299 static const char tokenlist[] ALIGN1 =
300         "\1("         NTC
301         "\1)"         NTC
302         "\1/"         NTC                                   /* REGEXP */
303         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
304         "\2++"        "\2--"        NTC                     /* UOPPOST */
305         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
306         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
307         "\2*="        "\2/="        "\2%="      "\2^="
308         "\1+"         "\1-"         "\3**="     "\2**"
309         "\1/"         "\1%"         "\1^"       "\1*"
310         "\2!="        "\2>="        "\2<="      "\1>"
311         "\1<"         "\2!~"        "\1~"       "\2&&"
312         "\2||"        "\1?"         "\1:"       NTC
313         "\2in"        NTC
314         "\1,"         NTC
315         "\1|"         NTC
316         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
317         "\1]"         NTC
318         "\1{"         NTC
319         "\1}"         NTC
320         "\1;"         NTC
321         "\1\n"        NTC
322         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
323         "\10continue" "\6delete"    "\5print"
324         "\6printf"    "\4next"      "\10nextfile"
325         "\6return"    "\4exit"      NTC
326         "\5while"     NTC
327         "\4else"      NTC
328
329         "\3and"       "\5compl"     "\6lshift"  "\2or"
330         "\6rshift"    "\3xor"
331         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
332         "\3cos"       "\3exp"       "\3int"     "\3log"
333         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
334         "\6gensub"    "\4gsub"      "\5index"   "\6length"
335         "\5match"     "\5split"     "\7sprintf" "\3sub"
336         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
337         "\7tolower"   "\7toupper"   NTC
338         "\7getline"   NTC
339         "\4func"      "\10function" NTC
340         "\5BEGIN"     NTC
341         "\3END"
342         /* compiler adds trailing "\0" */
343         ;
344
345 static const uint32_t tokeninfo[] = {
346         0,
347         0,
348         OC_REGEXP,
349         xS|'a',                  xS|'w',                  xS|'|',
350         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
351         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
352         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
356         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
357         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
358         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359         OC_IN|SV|P(49), /* in */
360         OC_COMMA|SS|P(80),
361         OC_PGETLINE|SV|P(37),
362         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
363         0, /* ] */
364         0,
365         0,
366         0,
367         0, /* \n */
368         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
369         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
370         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
371         OC_RETURN|Vx, OC_EXIT|Nx,
372         ST_WHILE,
373         0, /* else */
374
375         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
382         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384         OC_GETLINE|SV|P(0),
385         0,                 0,
386         0,
387         0 /* END */
388 };
389
390 /* internal variable names and their initial values       */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
392 enum {
393         CONVFMT,    OFMT,       FS,         OFS,
394         ORS,        RS,         RT,         FILENAME,
395         SUBSEP,     F0,         ARGIND,     ARGC,
396         ARGV,       ERRNO,      FNR,        NR,
397         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
398 };
399
400 static const char vNames[] ALIGN1 =
401         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
402         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
403         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
404         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
405         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
406
407 static const char vValues[] ALIGN1 =
408         "%.6g\0"    "%.6g\0"    " \0"       " \0"
409         "\n\0"      "\n\0"      "\0"        "\0"
410         "\034\0"    "\0"        "\377";
411
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
415
416
417 /* Globals. Split in two parts so that first one is addressed
418  * with (mostly short) negative offsets.
419  * NB: it's unsafe to put members of type "double"
420  * into globals2 (gcc may fail to align them).
421  */
422 struct globals {
423         double t_double;
424         chain beginseq, mainseq, endseq;
425         chain *seq;
426         node *break_ptr, *continue_ptr;
427         rstream *iF;
428         xhash *vhash, *ahash, *fdhash, *fnhash;
429         const char *g_progname;
430         int g_lineno;
431         int nfields;
432         int maxfields; /* used in fsrealloc() only */
433         var *Fields;
434         nvblock *g_cb;
435         char *g_pos;
436         char *g_buf;
437         smallint icase;
438         smallint exiting;
439         smallint nextrec;
440         smallint nextfile;
441         smallint is_f0_split;
442         smallint t_rollback;
443 };
444 struct globals2 {
445         uint32_t t_info; /* often used */
446         uint32_t t_tclass;
447         char *t_string;
448         int t_lineno;
449
450         var *intvar[NUM_INTERNAL_VARS]; /* often used */
451
452         /* former statics from various functions */
453         char *split_f0__fstrings;
454
455         uint32_t next_token__save_tclass;
456         uint32_t next_token__save_info;
457         uint32_t next_token__ltclass;
458         smallint next_token__concat_inserted;
459
460         smallint next_input_file__files_happen;
461         rstream next_input_file__rsm;
462
463         var *evaluate__fnargs;
464         unsigned evaluate__seed;
465         regex_t evaluate__sreg;
466
467         var ptest__v;
468
469         tsplitter exec_builtin__tspl;
470
471         /* biggest and least used members go last */
472         tsplitter fsplitter, rsplitter;
473 };
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double     (G1.t_double    )
482 #define beginseq     (G1.beginseq    )
483 #define mainseq      (G1.mainseq     )
484 #define endseq       (G1.endseq      )
485 #define seq          (G1.seq         )
486 #define break_ptr    (G1.break_ptr   )
487 #define continue_ptr (G1.continue_ptr)
488 #define iF           (G1.iF          )
489 #define vhash        (G1.vhash       )
490 #define ahash        (G1.ahash       )
491 #define fdhash       (G1.fdhash      )
492 #define fnhash       (G1.fnhash      )
493 #define g_progname   (G1.g_progname  )
494 #define g_lineno     (G1.g_lineno    )
495 #define nfields      (G1.nfields     )
496 #define maxfields    (G1.maxfields   )
497 #define Fields       (G1.Fields      )
498 #define g_cb         (G1.g_cb        )
499 #define g_pos        (G1.g_pos       )
500 #define g_buf        (G1.g_buf       )
501 #define icase        (G1.icase       )
502 #define exiting      (G1.exiting     )
503 #define nextrec      (G1.nextrec     )
504 #define nextfile     (G1.nextfile    )
505 #define is_f0_split  (G1.is_f0_split )
506 #define t_rollback   (G1.t_rollback  )
507 #define t_info       (G.t_info      )
508 #define t_tclass     (G.t_tclass    )
509 #define t_string     (G.t_string    )
510 #define t_lineno     (G.t_lineno    )
511 #define intvar       (G.intvar      )
512 #define fsplitter    (G.fsplitter   )
513 #define rsplitter    (G.rsplitter   )
514 #define INIT_G() do { \
515         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516         G.next_token__ltclass = TC_OPTERM; \
517         G.evaluate__seed = 1; \
518 } while (0)
519
520
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
529
530 /* ---- error handling ---- */
531
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
542
543 static void zero_out_var(var *vp)
544 {
545         memset(vp, 0, sizeof(*vp));
546 }
547
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
550 {
551         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
552 }
553
554 /* ---- hash stuff ---- */
555
556 static unsigned hashidx(const char *name)
557 {
558         unsigned idx = 0;
559
560         while (*name)
561                 idx = *name++ + (idx << 6) - idx;
562         return idx;
563 }
564
565 /* create new hash */
566 static xhash *hash_init(void)
567 {
568         xhash *newhash;
569
570         newhash = xzalloc(sizeof(*newhash));
571         newhash->csize = FIRST_PRIME;
572         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573
574         return newhash;
575 }
576
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
579 {
580         hash_item *hi;
581
582         hi = hash->items[hashidx(name) % hash->csize];
583         while (hi) {
584                 if (strcmp(hi->name, name) == 0)
585                         return &hi->data;
586                 hi = hi->next;
587         }
588         return NULL;
589 }
590
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
593 {
594         unsigned newsize, i, idx;
595         hash_item **newitems, *hi, *thi;
596
597         if (hash->nprime == ARRAY_SIZE(PRIMES))
598                 return;
599
600         newsize = PRIMES[hash->nprime++];
601         newitems = xzalloc(newsize * sizeof(newitems[0]));
602
603         for (i = 0; i < hash->csize; i++) {
604                 hi = hash->items[i];
605                 while (hi) {
606                         thi = hi;
607                         hi = thi->next;
608                         idx = hashidx(thi->name) % newsize;
609                         thi->next = newitems[idx];
610                         newitems[idx] = thi;
611                 }
612         }
613
614         free(hash->items);
615         hash->csize = newsize;
616         hash->items = newitems;
617 }
618
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
621 {
622         hash_item *hi;
623         unsigned idx;
624         int l;
625
626         hi = hash_search(hash, name);
627         if (!hi) {
628                 if (++hash->nel / hash->csize > 10)
629                         hash_rebuild(hash);
630
631                 l = strlen(name) + 1;
632                 hi = xzalloc(sizeof(*hi) + l);
633                 strcpy(hi->name, name);
634
635                 idx = hashidx(name) % hash->csize;
636                 hi->next = hash->items[idx];
637                 hash->items[idx] = hi;
638                 hash->glen += l;
639         }
640         return &hi->data;
641 }
642
643 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
644 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
645 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
647
648 static void hash_remove(xhash *hash, const char *name)
649 {
650         hash_item *hi, **phi;
651
652         phi = &hash->items[hashidx(name) % hash->csize];
653         while (*phi) {
654                 hi = *phi;
655                 if (strcmp(hi->name, name) == 0) {
656                         hash->glen -= (strlen(name) + 1);
657                         hash->nel--;
658                         *phi = hi->next;
659                         free(hi);
660                         break;
661                 }
662                 phi = &hi->next;
663         }
664 }
665
666 /* ------ some useful functions ------ */
667
668 static char *skip_spaces(char *p)
669 {
670         while (1) {
671                 if (*p == '\\' && p[1] == '\n') {
672                         p++;
673                         t_lineno++;
674                 } else if (*p != ' ' && *p != '\t') {
675                         break;
676                 }
677                 p++;
678         }
679         return p;
680 }
681
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
684 {
685         char *p = *s;
686         while (*(*s)++ != '\0')
687                 continue;
688         return p;
689 }
690
691 static char nextchar(char **s)
692 {
693         char c, *pps;
694
695         c = *(*s)++;
696         pps = *s;
697         if (c == '\\')
698                 c = bb_process_escape_sequence((const char**)s);
699         /* Example awk statement:
700          * s = "abc\"def"
701          * we must treat \" as "
702          */
703         if (c == '\\' && *s == pps) { /* unrecognized \z? */
704                 c = *(*s); /* yes, fetch z */
705                 if (c)
706                         (*s)++; /* advance unless z = NUL */
707         }
708         return c;
709 }
710
711 /* TODO: merge with strcpy_and_process_escape_sequences()?
712  */
713 static void unescape_string_in_place(char *s1)
714 {
715         char *s = s1;
716         while ((*s1 = nextchar(&s)) != '\0')
717                 s1++;
718 }
719
720 static ALWAYS_INLINE int isalnum_(int c)
721 {
722         return (isalnum(c) || c == '_');
723 }
724
725 static double my_strtod(char **pp)
726 {
727         char *cp = *pp;
728         if (ENABLE_DESKTOP && cp[0] == '0') {
729                 /* Might be hex or octal integer: 0x123abc or 07777 */
730                 char c = (cp[1] | 0x20);
731                 if (c == 'x' || isdigit(cp[1])) {
732                         unsigned long long ull = strtoull(cp, pp, 0);
733                         if (c == 'x')
734                                 return ull;
735                         c = **pp;
736                         if (!isdigit(c) && c != '.')
737                                 return ull;
738                         /* else: it may be a floating number. Examples:
739                          * 009.123 (*pp points to '9')
740                          * 000.123 (*pp points to '.')
741                          * fall through to strtod.
742                          */
743                 }
744         }
745         return strtod(cp, pp);
746 }
747
748 /* -------- working with variables (set/get/copy/etc) -------- */
749
750 static xhash *iamarray(var *v)
751 {
752         var *a = v;
753
754         while (a->type & VF_CHILD)
755                 a = a->x.parent;
756
757         if (!(a->type & VF_ARRAY)) {
758                 a->type |= VF_ARRAY;
759                 a->x.array = hash_init();
760         }
761         return a->x.array;
762 }
763
764 static void clear_array(xhash *array)
765 {
766         unsigned i;
767         hash_item *hi, *thi;
768
769         for (i = 0; i < array->csize; i++) {
770                 hi = array->items[i];
771                 while (hi) {
772                         thi = hi;
773                         hi = hi->next;
774                         free(thi->data.v.string);
775                         free(thi);
776                 }
777                 array->items[i] = NULL;
778         }
779         array->glen = array->nel = 0;
780 }
781
782 /* clear a variable */
783 static var *clrvar(var *v)
784 {
785         if (!(v->type & VF_FSTR))
786                 free(v->string);
787
788         v->type &= VF_DONTTOUCH;
789         v->type |= VF_DIRTY;
790         v->string = NULL;
791         return v;
792 }
793
794 /* assign string value to variable */
795 static var *setvar_p(var *v, char *value)
796 {
797         clrvar(v);
798         v->string = value;
799         handle_special(v);
800         return v;
801 }
802
803 /* same as setvar_p but make a copy of string */
804 static var *setvar_s(var *v, const char *value)
805 {
806         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
807 }
808
809 /* same as setvar_s but sets USER flag */
810 static var *setvar_u(var *v, const char *value)
811 {
812         v = setvar_s(v, value);
813         v->type |= VF_USER;
814         return v;
815 }
816
817 /* set array element to user string */
818 static void setari_u(var *a, int idx, const char *s)
819 {
820         var *v;
821
822         v = findvar(iamarray(a), itoa(idx));
823         setvar_u(v, s);
824 }
825
826 /* assign numeric value to variable */
827 static var *setvar_i(var *v, double value)
828 {
829         clrvar(v);
830         v->type |= VF_NUMBER;
831         v->number = value;
832         handle_special(v);
833         return v;
834 }
835
836 static const char *getvar_s(var *v)
837 {
838         /* if v is numeric and has no cached string, convert it to string */
839         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
840                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
841                 v->string = xstrdup(g_buf);
842                 v->type |= VF_CACHED;
843         }
844         return (v->string == NULL) ? "" : v->string;
845 }
846
847 static double getvar_i(var *v)
848 {
849         char *s;
850
851         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
852                 v->number = 0;
853                 s = v->string;
854                 if (s && *s) {
855                         debug_printf_eval("getvar_i: '%s'->", s);
856                         v->number = my_strtod(&s);
857                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
858                         if (v->type & VF_USER) {
859                                 s = skip_spaces(s);
860                                 if (*s != '\0')
861                                         v->type &= ~VF_USER;
862                         }
863                 } else {
864                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
865                         v->type &= ~VF_USER;
866                 }
867                 v->type |= VF_CACHED;
868         }
869         debug_printf_eval("getvar_i: %f\n", v->number);
870         return v->number;
871 }
872
873 /* Used for operands of bitwise ops */
874 static unsigned long getvar_i_int(var *v)
875 {
876         double d = getvar_i(v);
877
878         /* Casting doubles to longs is undefined for values outside
879          * of target type range. Try to widen it as much as possible */
880         if (d >= 0)
881                 return (unsigned long)d;
882         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
883         return - (long) (unsigned long) (-d);
884 }
885
886 static var *copyvar(var *dest, const var *src)
887 {
888         if (dest != src) {
889                 clrvar(dest);
890                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
891                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
892                 dest->number = src->number;
893                 if (src->string)
894                         dest->string = xstrdup(src->string);
895         }
896         handle_special(dest);
897         return dest;
898 }
899
900 static var *incvar(var *v)
901 {
902         return setvar_i(v, getvar_i(v) + 1.0);
903 }
904
905 /* return true if v is number or numeric string */
906 static int is_numeric(var *v)
907 {
908         getvar_i(v);
909         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
910 }
911
912 /* return 1 when value of v corresponds to true, 0 otherwise */
913 static int istrue(var *v)
914 {
915         if (is_numeric(v))
916                 return (v->number != 0);
917         return (v->string && v->string[0]);
918 }
919
920 /* temporary variables allocator. Last allocated should be first freed */
921 static var *nvalloc(int n)
922 {
923         nvblock *pb = NULL;
924         var *v, *r;
925         int size;
926
927         while (g_cb) {
928                 pb = g_cb;
929                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
930                         break;
931                 g_cb = g_cb->next;
932         }
933
934         if (!g_cb) {
935                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
936                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
937                 g_cb->size = size;
938                 g_cb->pos = g_cb->nv;
939                 g_cb->prev = pb;
940                 /*g_cb->next = NULL; - xzalloc did it */
941                 if (pb)
942                         pb->next = g_cb;
943         }
944
945         v = r = g_cb->pos;
946         g_cb->pos += n;
947
948         while (v < g_cb->pos) {
949                 v->type = 0;
950                 v->string = NULL;
951                 v++;
952         }
953
954         return r;
955 }
956
957 static void nvfree(var *v)
958 {
959         var *p;
960
961         if (v < g_cb->nv || v >= g_cb->pos)
962                 syntax_error(EMSG_INTERNAL_ERROR);
963
964         for (p = v; p < g_cb->pos; p++) {
965                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
966                         clear_array(iamarray(p));
967                         free(p->x.array->items);
968                         free(p->x.array);
969                 }
970                 if (p->type & VF_WALK) {
971                         walker_list *n;
972                         walker_list *w = p->x.walker;
973                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
974                         p->x.walker = NULL;
975                         while (w) {
976                                 n = w->prev;
977                                 debug_printf_walker(" free(%p)\n", w);
978                                 free(w);
979                                 w = n;
980                         }
981                 }
982                 clrvar(p);
983         }
984
985         g_cb->pos = v;
986         while (g_cb->prev && g_cb->pos == g_cb->nv) {
987                 g_cb = g_cb->prev;
988         }
989 }
990
991 /* ------- awk program text parsing ------- */
992
993 /* Parse next token pointed by global pos, place results into global ttt.
994  * If token isn't expected, give away. Return token class
995  */
996 static uint32_t next_token(uint32_t expected)
997 {
998 #define concat_inserted (G.next_token__concat_inserted)
999 #define save_tclass     (G.next_token__save_tclass)
1000 #define save_info       (G.next_token__save_info)
1001 /* Initialized to TC_OPTERM: */
1002 #define ltclass         (G.next_token__ltclass)
1003
1004         char *p, *s;
1005         const char *tl;
1006         uint32_t tc;
1007         const uint32_t *ti;
1008
1009         if (t_rollback) {
1010                 t_rollback = FALSE;
1011
1012         } else if (concat_inserted) {
1013                 concat_inserted = FALSE;
1014                 t_tclass = save_tclass;
1015                 t_info = save_info;
1016
1017         } else {
1018                 p = g_pos;
1019  readnext:
1020                 p = skip_spaces(p);
1021                 g_lineno = t_lineno;
1022                 if (*p == '#')
1023                         while (*p != '\n' && *p != '\0')
1024                                 p++;
1025
1026                 if (*p == '\n')
1027                         t_lineno++;
1028
1029                 if (*p == '\0') {
1030                         tc = TC_EOF;
1031                         debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1032
1033                 } else if (*p == '\"') {
1034                         /* it's a string */
1035                         t_string = s = ++p;
1036                         while (*p != '\"') {
1037                                 char *pp;
1038                                 if (*p == '\0' || *p == '\n')
1039                                         syntax_error(EMSG_UNEXP_EOS);
1040                                 pp = p;
1041                                 *s++ = nextchar(&pp);
1042                                 p = pp;
1043                         }
1044                         p++;
1045                         *s = '\0';
1046                         tc = TC_STRING;
1047                         debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1048
1049                 } else if ((expected & TC_REGEXP) && *p == '/') {
1050                         /* it's regexp */
1051                         t_string = s = ++p;
1052                         while (*p != '/') {
1053                                 if (*p == '\0' || *p == '\n')
1054                                         syntax_error(EMSG_UNEXP_EOS);
1055                                 *s = *p++;
1056                                 if (*s++ == '\\') {
1057                                         char *pp = p;
1058                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1059                                         if (*p == '\\')
1060                                                 *s++ = '\\';
1061                                         if (pp == p)
1062                                                 *s++ = *p++;
1063                                         else
1064                                                 p = pp;
1065                                 }
1066                         }
1067                         p++;
1068                         *s = '\0';
1069                         tc = TC_REGEXP;
1070                         debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1071
1072                 } else if (*p == '.' || isdigit(*p)) {
1073                         /* it's a number */
1074                         char *pp = p;
1075                         t_double = my_strtod(&pp);
1076                         p = pp;
1077                         if (*p == '.')
1078                                 syntax_error(EMSG_UNEXP_TOKEN);
1079                         tc = TC_NUMBER;
1080                         debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1081
1082                 } else {
1083                         /* search for something known */
1084                         tl = tokenlist;
1085                         tc = 0x00000001;
1086                         ti = tokeninfo;
1087                         while (*tl) {
1088                                 int l = (unsigned char) *tl++;
1089                                 if (l == (unsigned char) NTCC) {
1090                                         tc <<= 1;
1091                                         continue;
1092                                 }
1093                                 /* if token class is expected,
1094                                  * token matches,
1095                                  * and it's not a longer word,
1096                                  */
1097                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1098                                  && strncmp(p, tl, l) == 0
1099                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1100                                 ) {
1101                                         /* then this is what we are looking for */
1102                                         t_info = *ti;
1103                                         debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1104                                         p += l;
1105                                         goto token_found;
1106                                 }
1107                                 ti++;
1108                                 tl += l;
1109                         }
1110                         /* not a known token */
1111
1112                         /* is it a name? (var/array/function) */
1113                         if (!isalnum_(*p))
1114                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1115                         /* yes */
1116                         t_string = --p;
1117                         while (isalnum_(*++p)) {
1118                                 p[-1] = *p;
1119                         }
1120                         p[-1] = '\0';
1121                         tc = TC_VARIABLE;
1122                         /* also consume whitespace between functionname and bracket */
1123                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1124                                 p = skip_spaces(p);
1125                         if (*p == '(') {
1126                                 tc = TC_FUNCTION;
1127                                 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1128                         } else {
1129                                 if (*p == '[') {
1130                                         p++;
1131                                         tc = TC_ARRAY;
1132                                         debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1133                                 } else
1134                                         debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1135                         }
1136                 }
1137  token_found:
1138                 g_pos = p;
1139
1140                 /* skipping newlines in some cases */
1141                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1142                         goto readnext;
1143
1144                 /* insert concatenation operator when needed */
1145                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1146                         concat_inserted = TRUE;
1147                         save_tclass = tc;
1148                         save_info = t_info;
1149                         tc = TC_BINOP;
1150                         t_info = OC_CONCAT | SS | P(35);
1151                 }
1152
1153                 t_tclass = tc;
1154         }
1155         ltclass = t_tclass;
1156
1157         /* Are we ready for this? */
1158         if (!(ltclass & expected))
1159                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1160                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1161
1162         return ltclass;
1163 #undef concat_inserted
1164 #undef save_tclass
1165 #undef save_info
1166 #undef ltclass
1167 }
1168
1169 static void rollback_token(void)
1170 {
1171         t_rollback = TRUE;
1172 }
1173
1174 static node *new_node(uint32_t info)
1175 {
1176         node *n;
1177
1178         n = xzalloc(sizeof(node));
1179         n->info = info;
1180         n->lineno = g_lineno;
1181         return n;
1182 }
1183
1184 static void mk_re_node(const char *s, node *n, regex_t *re)
1185 {
1186         n->info = OC_REGEXP;
1187         n->l.re = re;
1188         n->r.ire = re + 1;
1189         xregcomp(re, s, REG_EXTENDED);
1190         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1191 }
1192
1193 static node *condition(void)
1194 {
1195         next_token(TC_SEQSTART);
1196         return parse_expr(TC_SEQTERM);
1197 }
1198
1199 /* parse expression terminated by given argument, return ptr
1200  * to built subtree. Terminator is eaten by parse_expr */
1201 static node *parse_expr(uint32_t iexp)
1202 {
1203         node sn;
1204         node *cn = &sn;
1205         node *vn, *glptr;
1206         uint32_t tc, xtc;
1207         var *v;
1208
1209         debug_printf_parse("%s(%x)\n", __func__, iexp);
1210
1211         sn.info = PRIMASK;
1212         sn.r.n = glptr = NULL;
1213         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1214
1215         while (!((tc = next_token(xtc)) & iexp)) {
1216
1217                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1218                         /* input redirection (<) attached to glptr node */
1219                         debug_printf_parse("%s: input redir\n", __func__);
1220                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1221                         cn->a.n = glptr;
1222                         xtc = TC_OPERAND | TC_UOPPRE;
1223                         glptr = NULL;
1224
1225                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1226                         debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1227                         /* for binary and postfix-unary operators, jump back over
1228                          * previous operators with higher priority */
1229                         vn = cn;
1230                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1231                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1232                         ) {
1233                                 vn = vn->a.n;
1234                         }
1235                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1236                                 t_info += P(6);
1237                         cn = vn->a.n->r.n = new_node(t_info);
1238                         cn->a.n = vn->a.n;
1239                         if (tc & TC_BINOP) {
1240                                 cn->l.n = vn;
1241                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1242                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1243                                         /* it's a pipe */
1244                                         next_token(TC_GETLINE);
1245                                         /* give maximum priority to this pipe */
1246                                         cn->info &= ~PRIMASK;
1247                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1248                                 }
1249                         } else {
1250                                 cn->r.n = vn;
1251                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1252                         }
1253                         vn->a.n = cn;
1254
1255                 } else {
1256                         debug_printf_parse("%s: other\n", __func__);
1257                         /* for operands and prefix-unary operators, attach them
1258                          * to last node */
1259                         vn = cn;
1260                         cn = vn->r.n = new_node(t_info);
1261                         cn->a.n = vn;
1262                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1263                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1264                                 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1265                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1266                                 /* one should be very careful with switch on tclass -
1267                                  * only simple tclasses should be used! */
1268                                 switch (tc) {
1269                                 case TC_VARIABLE:
1270                                 case TC_ARRAY:
1271                                         debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1272                                         cn->info = OC_VAR;
1273                                         v = hash_search(ahash, t_string);
1274                                         if (v != NULL) {
1275                                                 cn->info = OC_FNARG;
1276                                                 cn->l.aidx = v->x.aidx;
1277                                         } else {
1278                                                 cn->l.v = newvar(t_string);
1279                                         }
1280                                         if (tc & TC_ARRAY) {
1281                                                 cn->info |= xS;
1282                                                 cn->r.n = parse_expr(TC_ARRTERM);
1283                                         }
1284                                         break;
1285
1286                                 case TC_NUMBER:
1287                                 case TC_STRING:
1288                                         debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1289                                         cn->info = OC_VAR;
1290                                         v = cn->l.v = xzalloc(sizeof(var));
1291                                         if (tc & TC_NUMBER)
1292                                                 setvar_i(v, t_double);
1293                                         else
1294                                                 setvar_s(v, t_string);
1295                                         break;
1296
1297                                 case TC_REGEXP:
1298                                         debug_printf_parse("%s: TC_REGEXP\n", __func__);
1299                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1300                                         break;
1301
1302                                 case TC_FUNCTION:
1303                                         debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1304                                         cn->info = OC_FUNC;
1305                                         cn->r.f = newfunc(t_string);
1306                                         cn->l.n = condition();
1307                                         break;
1308
1309                                 case TC_SEQSTART:
1310                                         debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1311                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1312                                         if (!cn)
1313                                                 syntax_error("Empty sequence");
1314                                         cn->a.n = vn;
1315                                         break;
1316
1317                                 case TC_GETLINE:
1318                                         debug_printf_parse("%s: TC_GETLINE\n", __func__);
1319                                         glptr = cn;
1320                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1321                                         break;
1322
1323                                 case TC_BUILTIN:
1324                                         debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1325                                         cn->l.n = condition();
1326                                         break;
1327                                 }
1328                         }
1329                 }
1330         }
1331
1332         debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1333         return sn.r.n;
1334 }
1335
1336 /* add node to chain. Return ptr to alloc'd node */
1337 static node *chain_node(uint32_t info)
1338 {
1339         node *n;
1340
1341         if (!seq->first)
1342                 seq->first = seq->last = new_node(0);
1343
1344         if (seq->programname != g_progname) {
1345                 seq->programname = g_progname;
1346                 n = chain_node(OC_NEWSOURCE);
1347                 n->l.new_progname = xstrdup(g_progname);
1348         }
1349
1350         n = seq->last;
1351         n->info = info;
1352         seq->last = n->a.n = new_node(OC_DONE);
1353
1354         return n;
1355 }
1356
1357 static void chain_expr(uint32_t info)
1358 {
1359         node *n;
1360
1361         n = chain_node(info);
1362         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1363         if (t_tclass & TC_GRPTERM)
1364                 rollback_token();
1365 }
1366
1367 static node *chain_loop(node *nn)
1368 {
1369         node *n, *n2, *save_brk, *save_cont;
1370
1371         save_brk = break_ptr;
1372         save_cont = continue_ptr;
1373
1374         n = chain_node(OC_BR | Vx);
1375         continue_ptr = new_node(OC_EXEC);
1376         break_ptr = new_node(OC_EXEC);
1377         chain_group();
1378         n2 = chain_node(OC_EXEC | Vx);
1379         n2->l.n = nn;
1380         n2->a.n = n;
1381         continue_ptr->a.n = n2;
1382         break_ptr->a.n = n->r.n = seq->last;
1383
1384         continue_ptr = save_cont;
1385         break_ptr = save_brk;
1386
1387         return n;
1388 }
1389
1390 /* parse group and attach it to chain */
1391 static void chain_group(void)
1392 {
1393         uint32_t c;
1394         node *n, *n2, *n3;
1395
1396         do {
1397                 c = next_token(TC_GRPSEQ);
1398         } while (c & TC_NEWLINE);
1399
1400         if (c & TC_GRPSTART) {
1401                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1402                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1403                         debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1404                         if (t_tclass & TC_NEWLINE)
1405                                 continue;
1406                         rollback_token();
1407                         chain_group();
1408                 }
1409                 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1410         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1411                 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1412                 rollback_token();
1413                 chain_expr(OC_EXEC | Vx);
1414         } else {
1415                 /* TC_STATEMNT */
1416                 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1417                 switch (t_info & OPCLSMASK) {
1418                 case ST_IF:
1419                         debug_printf_parse("%s: ST_IF\n", __func__);
1420                         n = chain_node(OC_BR | Vx);
1421                         n->l.n = condition();
1422                         chain_group();
1423                         n2 = chain_node(OC_EXEC);
1424                         n->r.n = seq->last;
1425                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1426                                 chain_group();
1427                                 n2->a.n = seq->last;
1428                         } else {
1429                                 rollback_token();
1430                         }
1431                         break;
1432
1433                 case ST_WHILE:
1434                         debug_printf_parse("%s: ST_WHILE\n", __func__);
1435                         n2 = condition();
1436                         n = chain_loop(NULL);
1437                         n->l.n = n2;
1438                         break;
1439
1440                 case ST_DO:
1441                         debug_printf_parse("%s: ST_DO\n", __func__);
1442                         n2 = chain_node(OC_EXEC);
1443                         n = chain_loop(NULL);
1444                         n2->a.n = n->a.n;
1445                         next_token(TC_WHILE);
1446                         n->l.n = condition();
1447                         break;
1448
1449                 case ST_FOR:
1450                         debug_printf_parse("%s: ST_FOR\n", __func__);
1451                         next_token(TC_SEQSTART);
1452                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1453                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1454                                 if ((n2->info & OPCLSMASK) != OC_IN)
1455                                         syntax_error(EMSG_UNEXP_TOKEN);
1456                                 n = chain_node(OC_WALKINIT | VV);
1457                                 n->l.n = n2->l.n;
1458                                 n->r.n = n2->r.n;
1459                                 n = chain_loop(NULL);
1460                                 n->info = OC_WALKNEXT | Vx;
1461                                 n->l.n = n2->l.n;
1462                         } else {                        /* for (;;) */
1463                                 n = chain_node(OC_EXEC | Vx);
1464                                 n->l.n = n2;
1465                                 n2 = parse_expr(TC_SEMICOL);
1466                                 n3 = parse_expr(TC_SEQTERM);
1467                                 n = chain_loop(n3);
1468                                 n->l.n = n2;
1469                                 if (!n2)
1470                                         n->info = OC_EXEC;
1471                         }
1472                         break;
1473
1474                 case OC_PRINT:
1475                 case OC_PRINTF:
1476                         debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1477                         n = chain_node(t_info);
1478                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1479                         if (t_tclass & TC_OUTRDR) {
1480                                 n->info |= t_info;
1481                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1482                         }
1483                         if (t_tclass & TC_GRPTERM)
1484                                 rollback_token();
1485                         break;
1486
1487                 case OC_BREAK:
1488                         debug_printf_parse("%s: OC_BREAK\n", __func__);
1489                         n = chain_node(OC_EXEC);
1490                         n->a.n = break_ptr;
1491                         break;
1492
1493                 case OC_CONTINUE:
1494                         debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1495                         n = chain_node(OC_EXEC);
1496                         n->a.n = continue_ptr;
1497                         break;
1498
1499                 /* delete, next, nextfile, return, exit */
1500                 default:
1501                         debug_printf_parse("%s: default\n", __func__);
1502                         chain_expr(t_info);
1503                 }
1504         }
1505 }
1506
1507 static void parse_program(char *p)
1508 {
1509         uint32_t tclass;
1510         node *cn;
1511         func *f;
1512         var *v;
1513
1514         g_pos = p;
1515         t_lineno = 1;
1516         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1517                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1518
1519                 if (tclass & TC_OPTERM) {
1520                         debug_printf_parse("%s: TC_OPTERM\n", __func__);
1521                         continue;
1522                 }
1523
1524                 seq = &mainseq;
1525                 if (tclass & TC_BEGIN) {
1526                         debug_printf_parse("%s: TC_BEGIN\n", __func__);
1527                         seq = &beginseq;
1528                         chain_group();
1529
1530                 } else if (tclass & TC_END) {
1531                         debug_printf_parse("%s: TC_END\n", __func__);
1532                         seq = &endseq;
1533                         chain_group();
1534
1535                 } else if (tclass & TC_FUNCDECL) {
1536                         debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1537                         next_token(TC_FUNCTION);
1538                         g_pos++;
1539                         f = newfunc(t_string);
1540                         f->body.first = NULL;
1541                         f->nargs = 0;
1542                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1543                                 v = findvar(ahash, t_string);
1544                                 v->x.aidx = f->nargs++;
1545
1546                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1547                                         break;
1548                         }
1549                         seq = &f->body;
1550                         chain_group();
1551                         clear_array(ahash);
1552
1553                 } else if (tclass & TC_OPSEQ) {
1554                         debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1555                         rollback_token();
1556                         cn = chain_node(OC_TEST);
1557                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1558                         if (t_tclass & TC_GRPSTART) {
1559                                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1560                                 rollback_token();
1561                                 chain_group();
1562                         } else {
1563                                 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1564                                 chain_node(OC_PRINT);
1565                         }
1566                         cn->r.n = mainseq.last;
1567
1568                 } else /* if (tclass & TC_GRPSTART) */ {
1569                         debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1570                         rollback_token();
1571                         chain_group();
1572                 }
1573         }
1574         debug_printf_parse("%s: TC_EOF\n", __func__);
1575 }
1576
1577
1578 /* -------- program execution part -------- */
1579
1580 static node *mk_splitter(const char *s, tsplitter *spl)
1581 {
1582         regex_t *re, *ire;
1583         node *n;
1584
1585         re = &spl->re[0];
1586         ire = &spl->re[1];
1587         n = &spl->n;
1588         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1589                 regfree(re);
1590                 regfree(ire); // TODO: nuke ire, use re+1?
1591         }
1592         if (s[0] && s[1]) { /* strlen(s) > 1 */
1593                 mk_re_node(s, n, re);
1594         } else {
1595                 n->info = (uint32_t) s[0];
1596         }
1597
1598         return n;
1599 }
1600
1601 /* use node as a regular expression. Supplied with node ptr and regex_t
1602  * storage space. Return ptr to regex (if result points to preg, it should
1603  * be later regfree'd manually
1604  */
1605 static regex_t *as_regex(node *op, regex_t *preg)
1606 {
1607         int cflags;
1608         var *v;
1609         const char *s;
1610
1611         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1612                 return icase ? op->r.ire : op->l.re;
1613         }
1614         v = nvalloc(1);
1615         s = getvar_s(evaluate(op, v));
1616
1617         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1618         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1619          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1620          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1621          * (maybe gsub is not supposed to use REG_EXTENDED?).
1622          */
1623         if (regcomp(preg, s, cflags)) {
1624                 cflags &= ~REG_EXTENDED;
1625                 xregcomp(preg, s, cflags);
1626         }
1627         nvfree(v);
1628         return preg;
1629 }
1630
1631 /* gradually increasing buffer.
1632  * note that we reallocate even if n == old_size,
1633  * and thus there is at least one extra allocated byte.
1634  */
1635 static char* qrealloc(char *b, int n, int *size)
1636 {
1637         if (!b || n >= *size) {
1638                 *size = n + (n>>1) + 80;
1639                 b = xrealloc(b, *size);
1640         }
1641         return b;
1642 }
1643
1644 /* resize field storage space */
1645 static void fsrealloc(int size)
1646 {
1647         int i;
1648
1649         if (size >= maxfields) {
1650                 i = maxfields;
1651                 maxfields = size + 16;
1652                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1653                 for (; i < maxfields; i++) {
1654                         Fields[i].type = VF_SPECIAL;
1655                         Fields[i].string = NULL;
1656                 }
1657         }
1658         /* if size < nfields, clear extra field variables */
1659         for (i = size; i < nfields; i++) {
1660                 clrvar(Fields + i);
1661         }
1662         nfields = size;
1663 }
1664
1665 static int awk_split(const char *s, node *spl, char **slist)
1666 {
1667         int l, n;
1668         char c[4];
1669         char *s1;
1670         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1671
1672         /* in worst case, each char would be a separate field */
1673         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1674         strcpy(s1, s);
1675
1676         c[0] = c[1] = (char)spl->info;
1677         c[2] = c[3] = '\0';
1678         if (*getvar_s(intvar[RS]) == '\0')
1679                 c[2] = '\n';
1680
1681         n = 0;
1682         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1683                 if (!*s)
1684                         return n; /* "": zero fields */
1685                 n++; /* at least one field will be there */
1686                 do {
1687                         l = strcspn(s, c+2); /* len till next NUL or \n */
1688                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1689                          && pmatch[0].rm_so <= l
1690                         ) {
1691                                 l = pmatch[0].rm_so;
1692                                 if (pmatch[0].rm_eo == 0) {
1693                                         l++;
1694                                         pmatch[0].rm_eo++;
1695                                 }
1696                                 n++; /* we saw yet another delimiter */
1697                         } else {
1698                                 pmatch[0].rm_eo = l;
1699                                 if (s[l])
1700                                         pmatch[0].rm_eo++;
1701                         }
1702                         memcpy(s1, s, l);
1703                         /* make sure we remove *all* of the separator chars */
1704                         do {
1705                                 s1[l] = '\0';
1706                         } while (++l < pmatch[0].rm_eo);
1707                         nextword(&s1);
1708                         s += pmatch[0].rm_eo;
1709                 } while (*s);
1710                 return n;
1711         }
1712         if (c[0] == '\0') {  /* null split */
1713                 while (*s) {
1714                         *s1++ = *s++;
1715                         *s1++ = '\0';
1716                         n++;
1717                 }
1718                 return n;
1719         }
1720         if (c[0] != ' ') {  /* single-character split */
1721                 if (icase) {
1722                         c[0] = toupper(c[0]);
1723                         c[1] = tolower(c[1]);
1724                 }
1725                 if (*s1)
1726                         n++;
1727                 while ((s1 = strpbrk(s1, c)) != NULL) {
1728                         *s1++ = '\0';
1729                         n++;
1730                 }
1731                 return n;
1732         }
1733         /* space split */
1734         while (*s) {
1735                 s = skip_whitespace(s);
1736                 if (!*s)
1737                         break;
1738                 n++;
1739                 while (*s && !isspace(*s))
1740                         *s1++ = *s++;
1741                 *s1++ = '\0';
1742         }
1743         return n;
1744 }
1745
1746 static void split_f0(void)
1747 {
1748 /* static char *fstrings; */
1749 #define fstrings (G.split_f0__fstrings)
1750
1751         int i, n;
1752         char *s;
1753
1754         if (is_f0_split)
1755                 return;
1756
1757         is_f0_split = TRUE;
1758         free(fstrings);
1759         fsrealloc(0);
1760         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1761         fsrealloc(n);
1762         s = fstrings;
1763         for (i = 0; i < n; i++) {
1764                 Fields[i].string = nextword(&s);
1765                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1766         }
1767
1768         /* set NF manually to avoid side effects */
1769         clrvar(intvar[NF]);
1770         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1771         intvar[NF]->number = nfields;
1772 #undef fstrings
1773 }
1774
1775 /* perform additional actions when some internal variables changed */
1776 static void handle_special(var *v)
1777 {
1778         int n;
1779         char *b;
1780         const char *sep, *s;
1781         int sl, l, len, i, bsize;
1782
1783         if (!(v->type & VF_SPECIAL))
1784                 return;
1785
1786         if (v == intvar[NF]) {
1787                 n = (int)getvar_i(v);
1788                 fsrealloc(n);
1789
1790                 /* recalculate $0 */
1791                 sep = getvar_s(intvar[OFS]);
1792                 sl = strlen(sep);
1793                 b = NULL;
1794                 len = 0;
1795                 for (i = 0; i < n; i++) {
1796                         s = getvar_s(&Fields[i]);
1797                         l = strlen(s);
1798                         if (b) {
1799                                 memcpy(b+len, sep, sl);
1800                                 len += sl;
1801                         }
1802                         b = qrealloc(b, len+l+sl, &bsize);
1803                         memcpy(b+len, s, l);
1804                         len += l;
1805                 }
1806                 if (b)
1807                         b[len] = '\0';
1808                 setvar_p(intvar[F0], b);
1809                 is_f0_split = TRUE;
1810
1811         } else if (v == intvar[F0]) {
1812                 is_f0_split = FALSE;
1813
1814         } else if (v == intvar[FS]) {
1815                 /*
1816                  * The POSIX-2008 standard says that changing FS should have no effect on the
1817                  * current input line, but only on the next one. The language is:
1818                  *
1819                  * > Before the first reference to a field in the record is evaluated, the record
1820                  * > shall be split into fields, according to the rules in Regular Expressions,
1821                  * > using the value of FS that was current at the time the record was read.
1822                  *
1823                  * So, split up current line before assignment to FS:
1824                  */
1825                 split_f0();
1826
1827                 mk_splitter(getvar_s(v), &fsplitter);
1828
1829         } else if (v == intvar[RS]) {
1830                 mk_splitter(getvar_s(v), &rsplitter);
1831
1832         } else if (v == intvar[IGNORECASE]) {
1833                 icase = istrue(v);
1834
1835         } else {                                /* $n */
1836                 n = getvar_i(intvar[NF]);
1837                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1838                 /* right here v is invalid. Just to note... */
1839         }
1840 }
1841
1842 /* step through func/builtin/etc arguments */
1843 static node *nextarg(node **pn)
1844 {
1845         node *n;
1846
1847         n = *pn;
1848         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1849                 *pn = n->r.n;
1850                 n = n->l.n;
1851         } else {
1852                 *pn = NULL;
1853         }
1854         return n;
1855 }
1856
1857 static void hashwalk_init(var *v, xhash *array)
1858 {
1859         hash_item *hi;
1860         unsigned i;
1861         walker_list *w;
1862         walker_list *prev_walker;
1863
1864         if (v->type & VF_WALK) {
1865                 prev_walker = v->x.walker;
1866         } else {
1867                 v->type |= VF_WALK;
1868                 prev_walker = NULL;
1869         }
1870         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1871
1872         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1873         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1874         w->cur = w->end = w->wbuf;
1875         w->prev = prev_walker;
1876         for (i = 0; i < array->csize; i++) {
1877                 hi = array->items[i];
1878                 while (hi) {
1879                         strcpy(w->end, hi->name);
1880                         nextword(&w->end);
1881                         hi = hi->next;
1882                 }
1883         }
1884 }
1885
1886 static int hashwalk_next(var *v)
1887 {
1888         walker_list *w = v->x.walker;
1889
1890         if (w->cur >= w->end) {
1891                 walker_list *prev_walker = w->prev;
1892
1893                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1894                 free(w);
1895                 v->x.walker = prev_walker;
1896                 return FALSE;
1897         }
1898
1899         setvar_s(v, nextword(&w->cur));
1900         return TRUE;
1901 }
1902
1903 /* evaluate node, return 1 when result is true, 0 otherwise */
1904 static int ptest(node *pattern)
1905 {
1906         /* ptest__v is "static": to save stack space? */
1907         return istrue(evaluate(pattern, &G.ptest__v));
1908 }
1909
1910 /* read next record from stream rsm into a variable v */
1911 static int awk_getline(rstream *rsm, var *v)
1912 {
1913         char *b;
1914         regmatch_t pmatch[2];
1915         int size, a, p, pp = 0;
1916         int fd, so, eo, r, rp;
1917         char c, *m, *s;
1918
1919         debug_printf_eval("entered %s()\n", __func__);
1920
1921         /* we're using our own buffer since we need access to accumulating
1922          * characters
1923          */
1924         fd = fileno(rsm->F);
1925         m = rsm->buffer;
1926         a = rsm->adv;
1927         p = rsm->pos;
1928         size = rsm->size;
1929         c = (char) rsplitter.n.info;
1930         rp = 0;
1931
1932         if (!m)
1933                 m = qrealloc(m, 256, &size);
1934
1935         do {
1936                 b = m + a;
1937                 so = eo = p;
1938                 r = 1;
1939                 if (p > 0) {
1940                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1941                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1942                                                         b, 1, pmatch, 0) == 0) {
1943                                         so = pmatch[0].rm_so;
1944                                         eo = pmatch[0].rm_eo;
1945                                         if (b[eo] != '\0')
1946                                                 break;
1947                                 }
1948                         } else if (c != '\0') {
1949                                 s = strchr(b+pp, c);
1950                                 if (!s)
1951                                         s = memchr(b+pp, '\0', p - pp);
1952                                 if (s) {
1953                                         so = eo = s-b;
1954                                         eo++;
1955                                         break;
1956                                 }
1957                         } else {
1958                                 while (b[rp] == '\n')
1959                                         rp++;
1960                                 s = strstr(b+rp, "\n\n");
1961                                 if (s) {
1962                                         so = eo = s-b;
1963                                         while (b[eo] == '\n')
1964                                                 eo++;
1965                                         if (b[eo] != '\0')
1966                                                 break;
1967                                 }
1968                         }
1969                 }
1970
1971                 if (a > 0) {
1972                         memmove(m, m+a, p+1);
1973                         b = m;
1974                         a = 0;
1975                 }
1976
1977                 m = qrealloc(m, a+p+128, &size);
1978                 b = m + a;
1979                 pp = p;
1980                 p += safe_read(fd, b+p, size-p-1);
1981                 if (p < pp) {
1982                         p = 0;
1983                         r = 0;
1984                         setvar_i(intvar[ERRNO], errno);
1985                 }
1986                 b[p] = '\0';
1987
1988         } while (p > pp);
1989
1990         if (p == 0) {
1991                 r--;
1992         } else {
1993                 c = b[so]; b[so] = '\0';
1994                 setvar_s(v, b+rp);
1995                 v->type |= VF_USER;
1996                 b[so] = c;
1997                 c = b[eo]; b[eo] = '\0';
1998                 setvar_s(intvar[RT], b+so);
1999                 b[eo] = c;
2000         }
2001
2002         rsm->buffer = m;
2003         rsm->adv = a + eo;
2004         rsm->pos = p - eo;
2005         rsm->size = size;
2006
2007         debug_printf_eval("returning from %s(): %d\n", __func__, r);
2008
2009         return r;
2010 }
2011
2012 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2013 {
2014         int r = 0;
2015         char c;
2016         const char *s = format;
2017
2018         if (int_as_int && n == (int)n) {
2019                 r = snprintf(b, size, "%d", (int)n);
2020         } else {
2021                 do { c = *s; } while (c && *++s);
2022                 if (strchr("diouxX", c)) {
2023                         r = snprintf(b, size, format, (int)n);
2024                 } else if (strchr("eEfgG", c)) {
2025                         r = snprintf(b, size, format, n);
2026                 } else {
2027                         syntax_error(EMSG_INV_FMT);
2028                 }
2029         }
2030         return r;
2031 }
2032
2033 /* formatted output into an allocated buffer, return ptr to buffer */
2034 static char *awk_printf(node *n)
2035 {
2036         char *b = NULL;
2037         char *fmt, *s, *f;
2038         const char *s1;
2039         int i, j, incr, bsize;
2040         char c, c1;
2041         var *v, *arg;
2042
2043         v = nvalloc(1);
2044         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2045
2046         i = 0;
2047         while (*f) {
2048                 s = f;
2049                 while (*f && (*f != '%' || *++f == '%'))
2050                         f++;
2051                 while (*f && !isalpha(*f)) {
2052                         if (*f == '*')
2053                                 syntax_error("%*x formats are not supported");
2054                         f++;
2055                 }
2056
2057                 incr = (f - s) + MAXVARFMT;
2058                 b = qrealloc(b, incr + i, &bsize);
2059                 c = *f;
2060                 if (c != '\0')
2061                         f++;
2062                 c1 = *f;
2063                 *f = '\0';
2064                 arg = evaluate(nextarg(&n), v);
2065
2066                 j = i;
2067                 if (c == 'c' || !c) {
2068                         i += sprintf(b+i, s, is_numeric(arg) ?
2069                                         (char)getvar_i(arg) : *getvar_s(arg));
2070                 } else if (c == 's') {
2071                         s1 = getvar_s(arg);
2072                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
2073                         i += sprintf(b+i, s, s1);
2074                 } else {
2075                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2076                 }
2077                 *f = c1;
2078
2079                 /* if there was an error while sprintf, return value is negative */
2080                 if (i < j)
2081                         i = j;
2082         }
2083
2084         free(fmt);
2085         nvfree(v);
2086         b = xrealloc(b, i + 1);
2087         b[i] = '\0';
2088         return b;
2089 }
2090
2091 /* Common substitution routine.
2092  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2093  * store result into (dest), return number of substitutions.
2094  * If nm = 0, replace all matches.
2095  * If src or dst is NULL, use $0.
2096  * If subexp != 0, enable subexpression matching (\1-\9).
2097  */
2098 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2099 {
2100         char *resbuf;
2101         const char *sp;
2102         int match_no, residx, replen, resbufsize;
2103         int regexec_flags;
2104         regmatch_t pmatch[10];
2105         regex_t sreg, *regex;
2106
2107         resbuf = NULL;
2108         residx = 0;
2109         match_no = 0;
2110         regexec_flags = 0;
2111         regex = as_regex(rn, &sreg);
2112         sp = getvar_s(src ? src : intvar[F0]);
2113         replen = strlen(repl);
2114         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2115                 int so = pmatch[0].rm_so;
2116                 int eo = pmatch[0].rm_eo;
2117
2118                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2119                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2120                 memcpy(resbuf + residx, sp, eo);
2121                 residx += eo;
2122                 if (++match_no >= nm) {
2123                         const char *s;
2124                         int nbs;
2125
2126                         /* replace */
2127                         residx -= (eo - so);
2128                         nbs = 0;
2129                         for (s = repl; *s; s++) {
2130                                 char c = resbuf[residx++] = *s;
2131                                 if (c == '\\') {
2132                                         nbs++;
2133                                         continue;
2134                                 }
2135                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2136                                         int j;
2137                                         residx -= ((nbs + 3) >> 1);
2138                                         j = 0;
2139                                         if (c != '&') {
2140                                                 j = c - '0';
2141                                                 nbs++;
2142                                         }
2143                                         if (nbs % 2) {
2144                                                 resbuf[residx++] = c;
2145                                         } else {
2146                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2147                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2148                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2149                                                 residx += n;
2150                                         }
2151                                 }
2152                                 nbs = 0;
2153                         }
2154                 }
2155
2156                 regexec_flags = REG_NOTBOL;
2157                 sp += eo;
2158                 if (match_no == nm)
2159                         break;
2160                 if (eo == so) {
2161                         /* Empty match (e.g. "b*" will match anywhere).
2162                          * Advance by one char. */
2163 //BUG (bug 1333):
2164 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2165 //... and will erroneously match "b" even though it is NOT at the word start.
2166 //we need REG_NOTBOW but it does not exist...
2167 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2168 //it should be able to do it correctly.
2169                         /* Subtle: this is safe only because
2170                          * qrealloc allocated at least one extra byte */
2171                         resbuf[residx] = *sp;
2172                         if (*sp == '\0')
2173                                 goto ret;
2174                         sp++;
2175                         residx++;
2176                 }
2177         }
2178
2179         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2180         strcpy(resbuf + residx, sp);
2181  ret:
2182         //bb_error_msg("end sp:'%s'%p", sp,sp);
2183         setvar_p(dest ? dest : intvar[F0], resbuf);
2184         if (regex == &sreg)
2185                 regfree(regex);
2186         return match_no;
2187 }
2188
2189 static NOINLINE int do_mktime(const char *ds)
2190 {
2191         struct tm then;
2192         int count;
2193
2194         /*memset(&then, 0, sizeof(then)); - not needed */
2195         then.tm_isdst = -1; /* default is unknown */
2196
2197         /* manpage of mktime says these fields are ints,
2198          * so we can sscanf stuff directly into them */
2199         count = sscanf(ds, "%u %u %u %u %u %u %d",
2200                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2201                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2202                 &then.tm_isdst);
2203
2204         if (count < 6
2205          || (unsigned)then.tm_mon < 1
2206          || (unsigned)then.tm_year < 1900
2207         ) {
2208                 return -1;
2209         }
2210
2211         then.tm_mon -= 1;
2212         then.tm_year -= 1900;
2213
2214         return mktime(&then);
2215 }
2216
2217 static NOINLINE var *exec_builtin(node *op, var *res)
2218 {
2219 #define tspl (G.exec_builtin__tspl)
2220
2221         var *tv;
2222         node *an[4];
2223         var *av[4];
2224         const char *as[4];
2225         regmatch_t pmatch[2];
2226         regex_t sreg, *re;
2227         node *spl;
2228         uint32_t isr, info;
2229         int nargs;
2230         time_t tt;
2231         int i, l, ll, n;
2232
2233         tv = nvalloc(4);
2234         isr = info = op->info;
2235         op = op->l.n;
2236
2237         av[2] = av[3] = NULL;
2238         for (i = 0; i < 4 && op; i++) {
2239                 an[i] = nextarg(&op);
2240                 if (isr & 0x09000000)
2241                         av[i] = evaluate(an[i], &tv[i]);
2242                 if (isr & 0x08000000)
2243                         as[i] = getvar_s(av[i]);
2244                 isr >>= 1;
2245         }
2246
2247         nargs = i;
2248         if ((uint32_t)nargs < (info >> 30))
2249                 syntax_error(EMSG_TOO_FEW_ARGS);
2250
2251         info &= OPNMASK;
2252         switch (info) {
2253
2254         case B_a2:
2255                 if (ENABLE_FEATURE_AWK_LIBM)
2256                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2257                 else
2258                         syntax_error(EMSG_NO_MATH);
2259                 break;
2260
2261         case B_sp: {
2262                 char *s, *s1;
2263
2264                 if (nargs > 2) {
2265                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2266                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2267                 } else {
2268                         spl = &fsplitter.n;
2269                 }
2270
2271                 n = awk_split(as[0], spl, &s);
2272                 s1 = s;
2273                 clear_array(iamarray(av[1]));
2274                 for (i = 1; i <= n; i++)
2275                         setari_u(av[1], i, nextword(&s));
2276                 free(s1);
2277                 setvar_i(res, n);
2278                 break;
2279         }
2280
2281         case B_ss: {
2282                 char *s;
2283
2284                 l = strlen(as[0]);
2285                 i = getvar_i(av[1]) - 1;
2286                 if (i > l)
2287                         i = l;
2288                 if (i < 0)
2289                         i = 0;
2290                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2291                 if (n < 0)
2292                         n = 0;
2293                 s = xstrndup(as[0]+i, n);
2294                 setvar_p(res, s);
2295                 break;
2296         }
2297
2298         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2299          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2300         case B_an:
2301                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2302                 break;
2303
2304         case B_co:
2305                 setvar_i(res, ~getvar_i_int(av[0]));
2306                 break;
2307
2308         case B_ls:
2309                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2310                 break;
2311
2312         case B_or:
2313                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2314                 break;
2315
2316         case B_rs:
2317                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2318                 break;
2319
2320         case B_xo:
2321                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2322                 break;
2323
2324         case B_lo:
2325         case B_up: {
2326                 char *s, *s1;
2327                 s1 = s = xstrdup(as[0]);
2328                 while (*s1) {
2329                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2330                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2331                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2332                         s1++;
2333                 }
2334                 setvar_p(res, s);
2335                 break;
2336         }
2337
2338         case B_ix:
2339                 n = 0;
2340                 ll = strlen(as[1]);
2341                 l = strlen(as[0]) - ll;
2342                 if (ll > 0 && l >= 0) {
2343                         if (!icase) {
2344                                 char *s = strstr(as[0], as[1]);
2345                                 if (s)
2346                                         n = (s - as[0]) + 1;
2347                         } else {
2348                                 /* this piece of code is terribly slow and
2349                                  * really should be rewritten
2350                                  */
2351                                 for (i = 0; i <= l; i++) {
2352                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2353                                                 n = i+1;
2354                                                 break;
2355                                         }
2356                                 }
2357                         }
2358                 }
2359                 setvar_i(res, n);
2360                 break;
2361
2362         case B_ti:
2363                 if (nargs > 1)
2364                         tt = getvar_i(av[1]);
2365                 else
2366                         time(&tt);
2367                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2368                 i = strftime(g_buf, MAXVARFMT,
2369                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2370                         localtime(&tt));
2371                 g_buf[i] = '\0';
2372                 setvar_s(res, g_buf);
2373                 break;
2374
2375         case B_mt:
2376                 setvar_i(res, do_mktime(as[0]));
2377                 break;
2378
2379         case B_ma:
2380                 re = as_regex(an[1], &sreg);
2381                 n = regexec(re, as[0], 1, pmatch, 0);
2382                 if (n == 0) {
2383                         pmatch[0].rm_so++;
2384                         pmatch[0].rm_eo++;
2385                 } else {
2386                         pmatch[0].rm_so = 0;
2387                         pmatch[0].rm_eo = -1;
2388                 }
2389                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2390                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2391                 setvar_i(res, pmatch[0].rm_so);
2392                 if (re == &sreg)
2393                         regfree(re);
2394                 break;
2395
2396         case B_ge:
2397                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2398                 break;
2399
2400         case B_gs:
2401                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2402                 break;
2403
2404         case B_su:
2405                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2406                 break;
2407         }
2408
2409         nvfree(tv);
2410         return res;
2411 #undef tspl
2412 }
2413
2414 /*
2415  * Evaluate node - the heart of the program. Supplied with subtree
2416  * and place where to store result. returns ptr to result.
2417  */
2418 #define XC(n) ((n) >> 8)
2419
2420 static var *evaluate(node *op, var *res)
2421 {
2422 /* This procedure is recursive so we should count every byte */
2423 #define fnargs (G.evaluate__fnargs)
2424 /* seed is initialized to 1 */
2425 #define seed   (G.evaluate__seed)
2426 #define sreg   (G.evaluate__sreg)
2427
2428         var *v1;
2429
2430         if (!op)
2431                 return setvar_s(res, NULL);
2432
2433         debug_printf_eval("entered %s()\n", __func__);
2434
2435         v1 = nvalloc(2);
2436
2437         while (op) {
2438                 struct {
2439                         var *v;
2440                         const char *s;
2441                 } L = L; /* for compiler */
2442                 struct {
2443                         var *v;
2444                         const char *s;
2445                 } R = R;
2446                 double L_d = L_d;
2447                 uint32_t opinfo;
2448                 int opn;
2449                 node *op1;
2450
2451                 opinfo = op->info;
2452                 opn = (opinfo & OPNMASK);
2453                 g_lineno = op->lineno;
2454                 op1 = op->l.n;
2455                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2456
2457                 /* execute inevitable things */
2458                 if (opinfo & OF_RES1)
2459                         L.v = evaluate(op1, v1);
2460                 if (opinfo & OF_RES2)
2461                         R.v = evaluate(op->r.n, v1+1);
2462                 if (opinfo & OF_STR1) {
2463                         L.s = getvar_s(L.v);
2464                         debug_printf_eval("L.s:'%s'\n", L.s);
2465                 }
2466                 if (opinfo & OF_STR2) {
2467                         R.s = getvar_s(R.v);
2468                         debug_printf_eval("R.s:'%s'\n", R.s);
2469                 }
2470                 if (opinfo & OF_NUM1) {
2471                         L_d = getvar_i(L.v);
2472                         debug_printf_eval("L_d:%f\n", L_d);
2473                 }
2474
2475                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2476                 switch (XC(opinfo & OPCLSMASK)) {
2477
2478                 /* -- iterative node type -- */
2479
2480                 /* test pattern */
2481                 case XC( OC_TEST ):
2482                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2483                                 /* it's range pattern */
2484                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2485                                         op->info |= OF_CHECKED;
2486                                         if (ptest(op1->r.n))
2487                                                 op->info &= ~OF_CHECKED;
2488                                         op = op->a.n;
2489                                 } else {
2490                                         op = op->r.n;
2491                                 }
2492                         } else {
2493                                 op = ptest(op1) ? op->a.n : op->r.n;
2494                         }
2495                         break;
2496
2497                 /* just evaluate an expression, also used as unconditional jump */
2498                 case XC( OC_EXEC ):
2499                         break;
2500
2501                 /* branch, used in if-else and various loops */
2502                 case XC( OC_BR ):
2503                         op = istrue(L.v) ? op->a.n : op->r.n;
2504                         break;
2505
2506                 /* initialize for-in loop */
2507                 case XC( OC_WALKINIT ):
2508                         hashwalk_init(L.v, iamarray(R.v));
2509                         break;
2510
2511                 /* get next array item */
2512                 case XC( OC_WALKNEXT ):
2513                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2514                         break;
2515
2516                 case XC( OC_PRINT ):
2517                 case XC( OC_PRINTF ): {
2518                         FILE *F = stdout;
2519
2520                         if (op->r.n) {
2521                                 rstream *rsm = newfile(R.s);
2522                                 if (!rsm->F) {
2523                                         if (opn == '|') {
2524                                                 rsm->F = popen(R.s, "w");
2525                                                 if (rsm->F == NULL)
2526                                                         bb_perror_msg_and_die("popen");
2527                                                 rsm->is_pipe = 1;
2528                                         } else {
2529                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2530                                         }
2531                                 }
2532                                 F = rsm->F;
2533                         }
2534
2535                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2536                                 if (!op1) {
2537                                         fputs(getvar_s(intvar[F0]), F);
2538                                 } else {
2539                                         while (op1) {
2540                                                 var *v = evaluate(nextarg(&op1), v1);
2541                                                 if (v->type & VF_NUMBER) {
2542                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2543                                                                         getvar_i(v), TRUE);
2544                                                         fputs(g_buf, F);
2545                                                 } else {
2546                                                         fputs(getvar_s(v), F);
2547                                                 }
2548
2549                                                 if (op1)
2550                                                         fputs(getvar_s(intvar[OFS]), F);
2551                                         }
2552                                 }
2553                                 fputs(getvar_s(intvar[ORS]), F);
2554
2555                         } else {        /* OC_PRINTF */
2556                                 char *s = awk_printf(op1);
2557                                 fputs(s, F);
2558                                 free(s);
2559                         }
2560                         fflush(F);
2561                         break;
2562                 }
2563
2564                 case XC( OC_DELETE ): {
2565                         uint32_t info = op1->info & OPCLSMASK;
2566                         var *v;
2567
2568                         if (info == OC_VAR) {
2569                                 v = op1->l.v;
2570                         } else if (info == OC_FNARG) {
2571                                 v = &fnargs[op1->l.aidx];
2572                         } else {
2573                                 syntax_error(EMSG_NOT_ARRAY);
2574                         }
2575
2576                         if (op1->r.n) {
2577                                 const char *s;
2578                                 clrvar(L.v);
2579                                 s = getvar_s(evaluate(op1->r.n, v1));
2580                                 hash_remove(iamarray(v), s);
2581                         } else {
2582                                 clear_array(iamarray(v));
2583                         }
2584                         break;
2585                 }
2586
2587                 case XC( OC_NEWSOURCE ):
2588                         g_progname = op->l.new_progname;
2589                         break;
2590
2591                 case XC( OC_RETURN ):
2592                         copyvar(res, L.v);
2593                         break;
2594
2595                 case XC( OC_NEXTFILE ):
2596                         nextfile = TRUE;
2597                 case XC( OC_NEXT ):
2598                         nextrec = TRUE;
2599                 case XC( OC_DONE ):
2600                         clrvar(res);
2601                         break;
2602
2603                 case XC( OC_EXIT ):
2604                         awk_exit(L_d);
2605
2606                 /* -- recursive node type -- */
2607
2608                 case XC( OC_VAR ):
2609                         L.v = op->l.v;
2610                         if (L.v == intvar[NF])
2611                                 split_f0();
2612                         goto v_cont;
2613
2614                 case XC( OC_FNARG ):
2615                         L.v = &fnargs[op->l.aidx];
2616  v_cont:
2617                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2618                         break;
2619
2620                 case XC( OC_IN ):
2621                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2622                         break;
2623
2624                 case XC( OC_REGEXP ):
2625                         op1 = op;
2626                         L.s = getvar_s(intvar[F0]);
2627                         goto re_cont;
2628
2629                 case XC( OC_MATCH ):
2630                         op1 = op->r.n;
2631  re_cont:
2632                         {
2633                                 regex_t *re = as_regex(op1, &sreg);
2634                                 int i = regexec(re, L.s, 0, NULL, 0);
2635                                 if (re == &sreg)
2636                                         regfree(re);
2637                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2638                         }
2639                         break;
2640
2641                 case XC( OC_MOVE ):
2642                         debug_printf_eval("MOVE\n");
2643                         /* if source is a temporary string, jusk relink it to dest */
2644 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2645 //then L.v ends up being a string, which is wrong
2646 //                      if (R.v == v1+1 && R.v->string) {
2647 //                              res = setvar_p(L.v, R.v->string);
2648 //                              R.v->string = NULL;
2649 //                      } else {
2650                                 res = copyvar(L.v, R.v);
2651 //                      }
2652                         break;
2653
2654                 case XC( OC_TERNARY ):
2655                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2656                                 syntax_error(EMSG_POSSIBLE_ERROR);
2657                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2658                         break;
2659
2660                 case XC( OC_FUNC ): {
2661                         var *vbeg, *v;
2662                         const char *sv_progname;
2663
2664                         if (!op->r.f->body.first)
2665                                 syntax_error(EMSG_UNDEF_FUNC);
2666
2667                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2668                         while (op1) {
2669                                 var *arg = evaluate(nextarg(&op1), v1);
2670                                 copyvar(v, arg);
2671                                 v->type |= VF_CHILD;
2672                                 v->x.parent = arg;
2673                                 if (++v - vbeg >= op->r.f->nargs)
2674                                         break;
2675                         }
2676
2677                         v = fnargs;
2678                         fnargs = vbeg;
2679                         sv_progname = g_progname;
2680
2681                         res = evaluate(op->r.f->body.first, res);
2682
2683                         g_progname = sv_progname;
2684                         nvfree(fnargs);
2685                         fnargs = v;
2686
2687                         break;
2688                 }
2689
2690                 case XC( OC_GETLINE ):
2691                 case XC( OC_PGETLINE ): {
2692                         rstream *rsm;
2693                         int i;
2694
2695                         if (op1) {
2696                                 rsm = newfile(L.s);
2697                                 if (!rsm->F) {
2698                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2699                                                 rsm->F = popen(L.s, "r");
2700                                                 rsm->is_pipe = TRUE;
2701                                         } else {
2702                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2703                                         }
2704                                 }
2705                         } else {
2706                                 if (!iF)
2707                                         iF = next_input_file();
2708                                 rsm = iF;
2709                         }
2710
2711                         if (!rsm || !rsm->F) {
2712                                 setvar_i(intvar[ERRNO], errno);
2713                                 setvar_i(res, -1);
2714                                 break;
2715                         }
2716
2717                         if (!op->r.n)
2718                                 R.v = intvar[F0];
2719
2720                         i = awk_getline(rsm, R.v);
2721                         if (i > 0 && !op1) {
2722                                 incvar(intvar[FNR]);
2723                                 incvar(intvar[NR]);
2724                         }
2725                         setvar_i(res, i);
2726                         break;
2727                 }
2728
2729                 /* simple builtins */
2730                 case XC( OC_FBLTIN ): {
2731                         double R_d = R_d; /* for compiler */
2732
2733                         switch (opn) {
2734                         case F_in:
2735                                 R_d = (int)L_d;
2736                                 break;
2737
2738                         case F_rn:
2739                                 R_d = (double)rand() / (double)RAND_MAX;
2740                                 break;
2741
2742                         case F_co:
2743                                 if (ENABLE_FEATURE_AWK_LIBM) {
2744                                         R_d = cos(L_d);
2745                                         break;
2746                                 }
2747
2748                         case F_ex:
2749                                 if (ENABLE_FEATURE_AWK_LIBM) {
2750                                         R_d = exp(L_d);
2751                                         break;
2752                                 }
2753
2754                         case F_lg:
2755                                 if (ENABLE_FEATURE_AWK_LIBM) {
2756                                         R_d = log(L_d);
2757                                         break;
2758                                 }
2759
2760                         case F_si:
2761                                 if (ENABLE_FEATURE_AWK_LIBM) {
2762                                         R_d = sin(L_d);
2763                                         break;
2764                                 }
2765
2766                         case F_sq:
2767                                 if (ENABLE_FEATURE_AWK_LIBM) {
2768                                         R_d = sqrt(L_d);
2769                                         break;
2770                                 }
2771
2772                                 syntax_error(EMSG_NO_MATH);
2773                                 break;
2774
2775                         case F_sr:
2776                                 R_d = (double)seed;
2777                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2778                                 srand(seed);
2779                                 break;
2780
2781                         case F_ti:
2782                                 R_d = time(NULL);
2783                                 break;
2784
2785                         case F_le:
2786                                 if (!op1)
2787                                         L.s = getvar_s(intvar[F0]);
2788                                 R_d = strlen(L.s);
2789                                 break;
2790
2791                         case F_sy:
2792                                 fflush_all();
2793                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2794                                                 ? (system(L.s) >> 8) : 0;
2795                                 break;
2796
2797                         case F_ff:
2798                                 if (!op1) {
2799                                         fflush(stdout);
2800                                 } else if (L.s && *L.s) {
2801                                         rstream *rsm = newfile(L.s);
2802                                         fflush(rsm->F);
2803                                 } else {
2804                                         fflush_all();
2805                                 }
2806                                 break;
2807
2808                         case F_cl: {
2809                                 rstream *rsm;
2810                                 int err = 0;
2811                                 rsm = (rstream *)hash_search(fdhash, L.s);
2812                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2813                                 if (rsm) {
2814                                         debug_printf_eval("OC_FBLTIN F_cl "
2815                                                 "rsm->is_pipe:%d, ->F:%p\n",
2816                                                 rsm->is_pipe, rsm->F);
2817                                         /* Can be NULL if open failed. Example:
2818                                          * getline line <"doesnt_exist";
2819                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2820                                          */
2821                                         if (rsm->F)
2822                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2823                                         free(rsm->buffer);
2824                                         hash_remove(fdhash, L.s);
2825                                 }
2826                                 if (err)
2827                                         setvar_i(intvar[ERRNO], errno);
2828                                 R_d = (double)err;
2829                                 break;
2830                         }
2831                         } /* switch */
2832                         setvar_i(res, R_d);
2833                         break;
2834                 }
2835
2836                 case XC( OC_BUILTIN ):
2837                         res = exec_builtin(op, res);
2838                         break;
2839
2840                 case XC( OC_SPRINTF ):
2841                         setvar_p(res, awk_printf(op1));
2842                         break;
2843
2844                 case XC( OC_UNARY ): {
2845                         double Ld, R_d;
2846
2847                         Ld = R_d = getvar_i(R.v);
2848                         switch (opn) {
2849                         case 'P':
2850                                 Ld = ++R_d;
2851                                 goto r_op_change;
2852                         case 'p':
2853                                 R_d++;
2854                                 goto r_op_change;
2855                         case 'M':
2856                                 Ld = --R_d;
2857                                 goto r_op_change;
2858                         case 'm':
2859                                 R_d--;
2860  r_op_change:
2861                                 setvar_i(R.v, R_d);
2862                                 break;
2863                         case '!':
2864                                 Ld = !istrue(R.v);
2865                                 break;
2866                         case '-':
2867                                 Ld = -R_d;
2868                                 break;
2869                         }
2870                         setvar_i(res, Ld);
2871                         break;
2872                 }
2873
2874                 case XC( OC_FIELD ): {
2875                         int i = (int)getvar_i(R.v);
2876                         if (i == 0) {
2877                                 res = intvar[F0];
2878                         } else {
2879                                 split_f0();
2880                                 if (i > nfields)
2881                                         fsrealloc(i);
2882                                 res = &Fields[i - 1];
2883                         }
2884                         break;
2885                 }
2886
2887                 /* concatenation (" ") and index joining (",") */
2888                 case XC( OC_CONCAT ):
2889                 case XC( OC_COMMA ): {
2890                         const char *sep = "";
2891                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2892                                 sep = getvar_s(intvar[SUBSEP]);
2893                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2894                         break;
2895                 }
2896
2897                 case XC( OC_LAND ):
2898                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2899                         break;
2900
2901                 case XC( OC_LOR ):
2902                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2903                         break;
2904
2905                 case XC( OC_BINARY ):
2906                 case XC( OC_REPLACE ): {
2907                         double R_d = getvar_i(R.v);
2908                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2909                         switch (opn) {
2910                         case '+':
2911                                 L_d += R_d;
2912                                 break;
2913                         case '-':
2914                                 L_d -= R_d;
2915                                 break;
2916                         case '*':
2917                                 L_d *= R_d;
2918                                 break;
2919                         case '/':
2920                                 if (R_d == 0)
2921                                         syntax_error(EMSG_DIV_BY_ZERO);
2922                                 L_d /= R_d;
2923                                 break;
2924                         case '&':
2925                                 if (ENABLE_FEATURE_AWK_LIBM)
2926                                         L_d = pow(L_d, R_d);
2927                                 else
2928                                         syntax_error(EMSG_NO_MATH);
2929                                 break;
2930                         case '%':
2931                                 if (R_d == 0)
2932                                         syntax_error(EMSG_DIV_BY_ZERO);
2933                                 L_d -= (int)(L_d / R_d) * R_d;
2934                                 break;
2935                         }
2936                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2937                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2938                         break;
2939                 }
2940
2941                 case XC( OC_COMPARE ): {
2942                         int i = i; /* for compiler */
2943                         double Ld;
2944
2945                         if (is_numeric(L.v) && is_numeric(R.v)) {
2946                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2947                         } else {
2948                                 const char *l = getvar_s(L.v);
2949                                 const char *r = getvar_s(R.v);
2950                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2951                         }
2952                         switch (opn & 0xfe) {
2953                         case 0:
2954                                 i = (Ld > 0);
2955                                 break;
2956                         case 2:
2957                                 i = (Ld >= 0);
2958                                 break;
2959                         case 4:
2960                                 i = (Ld == 0);
2961                                 break;
2962                         }
2963                         setvar_i(res, (i == 0) ^ (opn & 1));
2964                         break;
2965                 }
2966
2967                 default:
2968                         syntax_error(EMSG_POSSIBLE_ERROR);
2969                 }
2970                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2971                         op = op->a.n;
2972                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2973                         break;
2974                 if (nextrec)
2975                         break;
2976         } /* while (op) */
2977
2978         nvfree(v1);
2979         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2980         return res;
2981 #undef fnargs
2982 #undef seed
2983 #undef sreg
2984 }
2985
2986
2987 /* -------- main & co. -------- */
2988
2989 static int awk_exit(int r)
2990 {
2991         var tv;
2992         unsigned i;
2993         hash_item *hi;
2994
2995         zero_out_var(&tv);
2996
2997         if (!exiting) {
2998                 exiting = TRUE;
2999                 nextrec = FALSE;
3000                 evaluate(endseq.first, &tv);
3001         }
3002
3003         /* waiting for children */
3004         for (i = 0; i < fdhash->csize; i++) {
3005                 hi = fdhash->items[i];
3006                 while (hi) {
3007                         if (hi->data.rs.F && hi->data.rs.is_pipe)
3008                                 pclose(hi->data.rs.F);
3009                         hi = hi->next;
3010                 }
3011         }
3012
3013         exit(r);
3014 }
3015
3016 /* if expr looks like "var=value", perform assignment and return 1,
3017  * otherwise return 0 */
3018 static int is_assignment(const char *expr)
3019 {
3020         char *exprc, *val;
3021
3022         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3023                 return FALSE;
3024         }
3025
3026         exprc = xstrdup(expr);
3027         val = exprc + (val - expr);
3028         *val++ = '\0';
3029
3030         unescape_string_in_place(val);
3031         setvar_u(newvar(exprc), val);
3032         free(exprc);
3033         return TRUE;
3034 }
3035
3036 /* switch to next input file */
3037 static rstream *next_input_file(void)
3038 {
3039 #define rsm          (G.next_input_file__rsm)
3040 #define files_happen (G.next_input_file__files_happen)
3041
3042         FILE *F;
3043         const char *fname, *ind;
3044
3045         if (rsm.F)
3046                 fclose(rsm.F);
3047         rsm.F = NULL;
3048         rsm.pos = rsm.adv = 0;
3049
3050         for (;;) {
3051                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3052                         if (files_happen)
3053                                 return NULL;
3054                         fname = "-";
3055                         F = stdin;
3056                         break;
3057                 }
3058                 ind = getvar_s(incvar(intvar[ARGIND]));
3059                 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3060                 if (fname && *fname && !is_assignment(fname)) {
3061                         F = xfopen_stdin(fname);
3062                         break;
3063                 }
3064         }
3065
3066         files_happen = TRUE;
3067         setvar_s(intvar[FILENAME], fname);
3068         rsm.F = F;
3069         return &rsm;
3070 #undef rsm
3071 #undef files_happen
3072 }
3073
3074 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3075 int awk_main(int argc, char **argv)
3076 {
3077         unsigned opt;
3078         char *opt_F;
3079         llist_t *list_v = NULL;
3080         llist_t *list_f = NULL;
3081         int i, j;
3082         var *v;
3083         var tv;
3084         char **envp;
3085         char *vnames = (char *)vNames; /* cheat */
3086         char *vvalues = (char *)vValues;
3087
3088         INIT_G();
3089
3090         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3091          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3092         if (ENABLE_LOCALE_SUPPORT)
3093                 setlocale(LC_NUMERIC, "C");
3094
3095         zero_out_var(&tv);
3096
3097         /* allocate global buffer */
3098         g_buf = xmalloc(MAXVARFMT + 1);
3099
3100         vhash = hash_init();
3101         ahash = hash_init();
3102         fdhash = hash_init();
3103         fnhash = hash_init();
3104
3105         /* initialize variables */
3106         for (i = 0; *vnames; i++) {
3107                 intvar[i] = v = newvar(nextword(&vnames));
3108                 if (*vvalues != '\377')
3109                         setvar_s(v, nextword(&vvalues));
3110                 else
3111                         setvar_i(v, 0);
3112
3113                 if (*vnames == '*') {
3114                         v->type |= VF_SPECIAL;
3115                         vnames++;
3116                 }
3117         }
3118
3119         handle_special(intvar[FS]);
3120         handle_special(intvar[RS]);
3121
3122         newfile("/dev/stdin")->F = stdin;
3123         newfile("/dev/stdout")->F = stdout;
3124         newfile("/dev/stderr")->F = stderr;
3125
3126         /* Huh, people report that sometimes environ is NULL. Oh well. */
3127         if (environ) for (envp = environ; *envp; envp++) {
3128                 /* environ is writable, thus we don't strdup it needlessly */
3129                 char *s = *envp;
3130                 char *s1 = strchr(s, '=');
3131                 if (s1) {
3132                         *s1 = '\0';
3133                         /* Both findvar and setvar_u take const char*
3134                          * as 2nd arg -> environment is not trashed */
3135                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3136                         *s1 = '=';
3137                 }
3138         }
3139         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3140         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3141         argv += optind;
3142         argc -= optind;
3143         if (opt & 0x1) { /* -F */
3144                 unescape_string_in_place(opt_F);
3145                 setvar_s(intvar[FS], opt_F);
3146         }
3147         while (list_v) { /* -v */
3148                 if (!is_assignment(llist_pop(&list_v)))
3149                         bb_show_usage();
3150         }
3151         if (list_f) { /* -f */
3152                 do {
3153                         char *s = NULL;
3154                         FILE *from_file;
3155
3156                         g_progname = llist_pop(&list_f);
3157                         from_file = xfopen_stdin(g_progname);
3158                         /* one byte is reserved for some trick in next_token */
3159                         for (i = j = 1; j > 0; i += j) {
3160                                 s = xrealloc(s, i + 4096);
3161                                 j = fread(s + i, 1, 4094, from_file);
3162                         }
3163                         s[i] = '\0';
3164                         fclose(from_file);
3165                         parse_program(s + 1);
3166                         free(s);
3167                 } while (list_f);
3168                 argc++;
3169         } else { // no -f: take program from 1st parameter
3170                 if (!argc)
3171                         bb_show_usage();
3172                 g_progname = "cmd. line";
3173                 parse_program(*argv++);
3174         }
3175         if (opt & 0x8) // -W
3176                 bb_error_msg("warning: option -W is ignored");
3177
3178         /* fill in ARGV array */
3179         setvar_i(intvar[ARGC], argc);
3180         setari_u(intvar[ARGV], 0, "awk");
3181         i = 0;
3182         while (*argv)
3183                 setari_u(intvar[ARGV], ++i, *argv++);
3184
3185         evaluate(beginseq.first, &tv);
3186         if (!mainseq.first && !endseq.first)
3187                 awk_exit(EXIT_SUCCESS);
3188
3189         /* input file could already be opened in BEGIN block */
3190         if (!iF)
3191                 iF = next_input_file();
3192
3193         /* passing through input files */
3194         while (iF) {
3195                 nextfile = FALSE;
3196                 setvar_i(intvar[FNR], 0);
3197
3198                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3199                         nextrec = FALSE;
3200                         incvar(intvar[NR]);
3201                         incvar(intvar[FNR]);
3202                         evaluate(mainseq.first, &tv);
3203
3204                         if (nextfile)
3205                                 break;
3206                 }
3207
3208                 if (i < 0)
3209                         syntax_error(strerror(errno));
3210
3211                 iF = next_input_file();
3212         }
3213
3214         awk_exit(EXIT_SUCCESS);
3215         /*return 0;*/
3216 }