1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
245 /* simple builtins */
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime" "\6mktime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, F0, ARGIND, ARGC,
370 ARGV, ERRNO, FNR, NR,
371 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
374 static const char vNames[] ALIGN1 =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
378 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
379 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
381 static const char vValues[] ALIGN1 =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0" "\0" "\377";
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
391 /* Globals. Split in two parts so that first one is addressed
392 * with (mostly short) negative offsets.
393 * NB: it's unsafe to put members of type "double"
394 * into globals2 (gcc may fail to align them).
398 chain beginseq, mainseq, endseq;
400 node *break_ptr, *continue_ptr;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
406 int maxfields; /* used in fsrealloc() only */
415 smallint is_f0_split;
418 uint32_t t_info; /* often used */
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
426 /* former statics from various functions */
427 char *split_f0__fstrings;
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
443 tsplitter exec_builtin__tspl;
445 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double (G1.t_double )
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
519 static void zero_out_var(var * vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
536 while (*name) idx = *name++ + (idx << 6) - idx;
540 /* create new hash */
541 static xhash *hash_init(void)
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
557 hi = hash->items [ hashidx(name) % hash->csize ];
559 if (strcmp(hi->name, name) == 0)
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
578 for (i = 0; i < hash->csize; i++) {
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
590 hash->csize = newsize;
591 hash->items = newitems;
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
601 hi = hash_search(hash, name);
603 if (++hash->nel / hash->csize > 10)
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(*hi) + l);
608 strcpy(hi->name, name);
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
618 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
619 #define newvar(name) ((var*) hash_find(vhash, (name)))
620 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623 static void hash_remove(xhash *hash, const char *name)
625 hash_item *hi, **phi;
627 phi = &(hash->items[hashidx(name) % hash->csize]);
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
641 /* ------ some useful functions ------ */
643 static void skip_spaces(char **s)
648 if (*p == '\\' && p[1] == '\n') {
651 } else if (*p != ' ' && *p != '\t') {
659 static char *nextword(char **s)
663 while (*(*s)++) /* */;
668 static char nextchar(char **s)
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
679 static ALWAYS_INLINE int isalnum_(int c)
681 return (isalnum(c) || c == '_');
684 static double my_strtod(char **pp)
688 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
690 return strtoull(*pp, pp, 0);
693 return strtod(*pp, pp);
696 /* -------- working with variables (set/get/copy/etc) -------- */
698 static xhash *iamarray(var *v)
702 while (a->type & VF_CHILD)
705 if (!(a->type & VF_ARRAY)) {
707 a->x.array = hash_init();
712 static void clear_array(xhash *array)
717 for (i = 0; i < array->csize; i++) {
718 hi = array->items[i];
722 free(thi->data.v.string);
725 array->items[i] = NULL;
727 array->glen = array->nel = 0;
730 /* clear a variable */
731 static var *clrvar(var *v)
733 if (!(v->type & VF_FSTR))
736 v->type &= VF_DONTTOUCH;
742 /* assign string value to variable */
743 static var *setvar_p(var *v, char *value)
751 /* same as setvar_p but make a copy of string */
752 static var *setvar_s(var *v, const char *value)
754 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 /* same as setvar_s but set USER flag */
758 static var *setvar_u(var *v, const char *value)
765 /* set array element to user string */
766 static void setari_u(var *a, int idx, const char *s)
770 v = findvar(iamarray(a), itoa(idx));
774 /* assign numeric value to variable */
775 static var *setvar_i(var *v, double value)
778 v->type |= VF_NUMBER;
784 static const char *getvar_s(var *v)
786 /* if v is numeric and has no cached string, convert it to string */
787 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
788 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
789 v->string = xstrdup(g_buf);
790 v->type |= VF_CACHED;
792 return (v->string == NULL) ? "" : v->string;
795 static double getvar_i(var *v)
799 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
803 v->number = my_strtod(&s);
804 if (v->type & VF_USER) {
812 v->type |= VF_CACHED;
817 /* Used for operands of bitwise ops */
818 static unsigned long getvar_i_int(var *v)
820 double d = getvar_i(v);
822 /* Casting doubles to longs is undefined for values outside
823 * of target type range. Try to widen it as much as possible */
825 return (unsigned long)d;
826 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
827 return - (long) (unsigned long) (-d);
830 static var *copyvar(var *dest, const var *src)
834 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
835 dest->number = src->number;
837 dest->string = xstrdup(src->string);
839 handle_special(dest);
843 static var *incvar(var *v)
845 return setvar_i(v, getvar_i(v) + 1.);
848 /* return true if v is number or numeric string */
849 static int is_numeric(var *v)
852 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855 /* return 1 when value of v corresponds to true, 0 otherwise */
856 static int istrue(var *v)
859 return (v->number == 0) ? 0 : 1;
860 return (v->string && *(v->string)) ? 1 : 0;
863 /* temporary variables allocator. Last allocated should be first freed */
864 static var *nvalloc(int n)
872 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
877 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
878 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
880 g_cb->pos = g_cb->nv;
882 /*g_cb->next = NULL; - xzalloc did it */
883 if (pb) pb->next = g_cb;
889 while (v < g_cb->pos) {
898 static void nvfree(var *v)
902 if (v < g_cb->nv || v >= g_cb->pos)
903 syntax_error(EMSG_INTERNAL_ERROR);
905 for (p = v; p < g_cb->pos; p++) {
906 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
907 clear_array(iamarray(p));
908 free(p->x.array->items);
911 if (p->type & VF_WALK)
918 while (g_cb->prev && g_cb->pos == g_cb->nv) {
923 /* ------- awk program text parsing ------- */
925 /* Parse next token pointed by global pos, place results into global ttt.
926 * If token isn't expected, give away. Return token class
928 static uint32_t next_token(uint32_t expected)
930 #define concat_inserted (G.next_token__concat_inserted)
931 #define save_tclass (G.next_token__save_tclass)
932 #define save_info (G.next_token__save_info)
933 /* Initialized to TC_OPTERM: */
934 #define ltclass (G.next_token__ltclass)
945 } else if (concat_inserted) {
946 concat_inserted = FALSE;
947 t_tclass = save_tclass;
956 while (*p != '\n' && *p != '\0')
965 } else if (*p == '\"') {
969 if (*p == '\0' || *p == '\n')
970 syntax_error(EMSG_UNEXP_EOS);
971 *(s++) = nextchar(&p);
977 } else if ((expected & TC_REGEXP) && *p == '/') {
981 if (*p == '\0' || *p == '\n')
982 syntax_error(EMSG_UNEXP_EOS);
986 *(s-1) = bb_process_escape_sequence((const char **)&p);
997 } else if (*p == '.' || isdigit(*p)) {
999 t_double = my_strtod(&p);
1001 syntax_error(EMSG_UNEXP_TOKEN);
1005 /* search for something known */
1015 /* if token class is expected, token
1016 * matches and it's not a longer word,
1017 * then this is what we are looking for
1019 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1020 && *tl == *p && strncmp(p, tl, l) == 0
1021 && !((tc & TC_WORD) && isalnum_(p[l]))
1032 /* it's a name (var/array/function),
1033 * otherwise it's something wrong
1036 syntax_error(EMSG_UNEXP_TOKEN);
1039 while (isalnum_(*(++p))) {
1044 /* also consume whitespace between functionname and bracket */
1045 if (!(expected & TC_VARIABLE))
1059 /* skipping newlines in some cases */
1060 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063 /* insert concatenation operator when needed */
1064 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1065 concat_inserted = TRUE;
1069 t_info = OC_CONCAT | SS | P(35);
1076 /* Are we ready for this? */
1077 if (!(ltclass & expected))
1078 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1079 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1082 #undef concat_inserted
1088 static void rollback_token(void)
1093 static node *new_node(uint32_t info)
1097 n = xzalloc(sizeof(node));
1099 n->lineno = g_lineno;
1103 static node *mk_re_node(const char *s, node *n, regex_t *re)
1105 n->info = OC_REGEXP;
1108 xregcomp(re, s, REG_EXTENDED);
1109 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1114 static node *condition(void)
1116 next_token(TC_SEQSTART);
1117 return parse_expr(TC_SEQTERM);
1120 /* parse expression terminated by given argument, return ptr
1121 * to built subtree. Terminator is eaten by parse_expr */
1122 static node *parse_expr(uint32_t iexp)
1131 sn.r.n = glptr = NULL;
1132 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1134 while (!((tc = next_token(xtc)) & iexp)) {
1135 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1136 /* input redirection (<) attached to glptr node */
1137 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1139 xtc = TC_OPERAND | TC_UOPPRE;
1142 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1143 /* for binary and postfix-unary operators, jump back over
1144 * previous operators with higher priority */
1146 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1147 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1149 if ((t_info & OPCLSMASK) == OC_TERNARY)
1151 cn = vn->a.n->r.n = new_node(t_info);
1153 if (tc & TC_BINOP) {
1155 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1156 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1158 next_token(TC_GETLINE);
1159 /* give maximum priority to this pipe */
1160 cn->info &= ~PRIMASK;
1161 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1165 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1170 /* for operands and prefix-unary operators, attach them
1173 cn = vn->r.n = new_node(t_info);
1175 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1176 if (tc & (TC_OPERAND | TC_REGEXP)) {
1177 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1178 /* one should be very careful with switch on tclass -
1179 * only simple tclasses should be used! */
1184 v = hash_search(ahash, t_string);
1186 cn->info = OC_FNARG;
1187 cn->l.i = v->x.aidx;
1189 cn->l.v = newvar(t_string);
1191 if (tc & TC_ARRAY) {
1193 cn->r.n = parse_expr(TC_ARRTERM);
1200 v = cn->l.v = xzalloc(sizeof(var));
1202 setvar_i(v, t_double);
1204 setvar_s(v, t_string);
1208 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1213 cn->r.f = newfunc(t_string);
1214 cn->l.n = condition();
1218 cn = vn->r.n = parse_expr(TC_SEQTERM);
1224 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1228 cn->l.n = condition();
1237 /* add node to chain. Return ptr to alloc'd node */
1238 static node *chain_node(uint32_t info)
1243 seq->first = seq->last = new_node(0);
1245 if (seq->programname != g_progname) {
1246 seq->programname = g_progname;
1247 n = chain_node(OC_NEWSOURCE);
1248 n->l.s = xstrdup(g_progname);
1253 seq->last = n->a.n = new_node(OC_DONE);
1258 static void chain_expr(uint32_t info)
1262 n = chain_node(info);
1263 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1264 if (t_tclass & TC_GRPTERM)
1268 static node *chain_loop(node *nn)
1270 node *n, *n2, *save_brk, *save_cont;
1272 save_brk = break_ptr;
1273 save_cont = continue_ptr;
1275 n = chain_node(OC_BR | Vx);
1276 continue_ptr = new_node(OC_EXEC);
1277 break_ptr = new_node(OC_EXEC);
1279 n2 = chain_node(OC_EXEC | Vx);
1282 continue_ptr->a.n = n2;
1283 break_ptr->a.n = n->r.n = seq->last;
1285 continue_ptr = save_cont;
1286 break_ptr = save_brk;
1291 /* parse group and attach it to chain */
1292 static void chain_group(void)
1298 c = next_token(TC_GRPSEQ);
1299 } while (c & TC_NEWLINE);
1301 if (c & TC_GRPSTART) {
1302 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1303 if (t_tclass & TC_NEWLINE) continue;
1307 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1309 chain_expr(OC_EXEC | Vx);
1310 } else { /* TC_STATEMNT */
1311 switch (t_info & OPCLSMASK) {
1313 n = chain_node(OC_BR | Vx);
1314 n->l.n = condition();
1316 n2 = chain_node(OC_EXEC);
1318 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1320 n2->a.n = seq->last;
1328 n = chain_loop(NULL);
1333 n2 = chain_node(OC_EXEC);
1334 n = chain_loop(NULL);
1336 next_token(TC_WHILE);
1337 n->l.n = condition();
1341 next_token(TC_SEQSTART);
1342 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1343 if (t_tclass & TC_SEQTERM) { /* for-in */
1344 if ((n2->info & OPCLSMASK) != OC_IN)
1345 syntax_error(EMSG_UNEXP_TOKEN);
1346 n = chain_node(OC_WALKINIT | VV);
1349 n = chain_loop(NULL);
1350 n->info = OC_WALKNEXT | Vx;
1352 } else { /* for (;;) */
1353 n = chain_node(OC_EXEC | Vx);
1355 n2 = parse_expr(TC_SEMICOL);
1356 n3 = parse_expr(TC_SEQTERM);
1366 n = chain_node(t_info);
1367 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1368 if (t_tclass & TC_OUTRDR) {
1370 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1372 if (t_tclass & TC_GRPTERM)
1377 n = chain_node(OC_EXEC);
1382 n = chain_node(OC_EXEC);
1383 n->a.n = continue_ptr;
1386 /* delete, next, nextfile, return, exit */
1393 static void parse_program(char *p)
1402 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1403 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1405 if (tclass & TC_OPTERM)
1409 if (tclass & TC_BEGIN) {
1413 } else if (tclass & TC_END) {
1417 } else if (tclass & TC_FUNCDECL) {
1418 next_token(TC_FUNCTION);
1420 f = newfunc(t_string);
1421 f->body.first = NULL;
1423 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1424 v = findvar(ahash, t_string);
1425 v->x.aidx = (f->nargs)++;
1427 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1434 } else if (tclass & TC_OPSEQ) {
1436 cn = chain_node(OC_TEST);
1437 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1438 if (t_tclass & TC_GRPSTART) {
1442 chain_node(OC_PRINT);
1444 cn->r.n = mainseq.last;
1446 } else /* if (tclass & TC_GRPSTART) */ {
1454 /* -------- program execution part -------- */
1456 static node *mk_splitter(const char *s, tsplitter *spl)
1464 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1466 regfree(ire); // TODO: nuke ire, use re+1?
1468 if (strlen(s) > 1) {
1469 mk_re_node(s, n, re);
1471 n->info = (uint32_t) *s;
1477 /* use node as a regular expression. Supplied with node ptr and regex_t
1478 * storage space. Return ptr to regex (if result points to preg, it should
1479 * be later regfree'd manually
1481 static regex_t *as_regex(node *op, regex_t *preg)
1487 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1488 return icase ? op->r.ire : op->l.re;
1491 s = getvar_s(evaluate(op, v));
1493 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1494 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1495 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1496 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1497 * (maybe gsub is not supposed to use REG_EXTENDED?).
1499 if (regcomp(preg, s, cflags)) {
1500 cflags &= ~REG_EXTENDED;
1501 xregcomp(preg, s, cflags);
1507 /* gradually increasing buffer */
1508 static void qrealloc(char **b, int n, int *size)
1510 if (!*b || n >= *size) {
1511 *size = n + (n>>1) + 80;
1512 *b = xrealloc(*b, *size);
1516 /* resize field storage space */
1517 static void fsrealloc(int size)
1521 if (size >= maxfields) {
1523 maxfields = size + 16;
1524 Fields = xrealloc(Fields, maxfields * sizeof(var));
1525 for (; i < maxfields; i++) {
1526 Fields[i].type = VF_SPECIAL;
1527 Fields[i].string = NULL;
1531 if (size < nfields) {
1532 for (i = size; i < nfields; i++) {
1539 static int awk_split(const char *s, node *spl, char **slist)
1544 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1546 /* in worst case, each char would be a separate field */
1547 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1550 c[0] = c[1] = (char)spl->info;
1552 if (*getvar_s(intvar[RS]) == '\0')
1555 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1557 return n; /* "": zero fields */
1558 n++; /* at least one field will be there */
1560 l = strcspn(s, c+2); /* len till next NUL or \n */
1561 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1562 && pmatch[0].rm_so <= l
1564 l = pmatch[0].rm_so;
1565 if (pmatch[0].rm_eo == 0) {
1569 n++; /* we saw yet another delimiter */
1571 pmatch[0].rm_eo = l;
1576 /* make sure we remove *all* of the separator chars */
1579 } while (++l < pmatch[0].rm_eo);
1581 s += pmatch[0].rm_eo;
1585 if (c[0] == '\0') { /* null split */
1593 if (c[0] != ' ') { /* single-character split */
1595 c[0] = toupper(c[0]);
1596 c[1] = tolower(c[1]);
1599 while ((s1 = strpbrk(s1, c))) {
1607 s = skip_whitespace(s);
1610 while (*s && !isspace(*s))
1617 static void split_f0(void)
1619 /* static char *fstrings; */
1620 #define fstrings (G.split_f0__fstrings)
1631 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1634 for (i = 0; i < n; i++) {
1635 Fields[i].string = nextword(&s);
1636 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1639 /* set NF manually to avoid side effects */
1641 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1642 intvar[NF]->number = nfields;
1646 /* perform additional actions when some internal variables changed */
1647 static void handle_special(var *v)
1651 const char *sep, *s;
1652 int sl, l, len, i, bsize;
1654 if (!(v->type & VF_SPECIAL))
1657 if (v == intvar[NF]) {
1658 n = (int)getvar_i(v);
1661 /* recalculate $0 */
1662 sep = getvar_s(intvar[OFS]);
1666 for (i = 0; i < n; i++) {
1667 s = getvar_s(&Fields[i]);
1670 memcpy(b+len, sep, sl);
1673 qrealloc(&b, len+l+sl, &bsize);
1674 memcpy(b+len, s, l);
1679 setvar_p(intvar[F0], b);
1682 } else if (v == intvar[F0]) {
1683 is_f0_split = FALSE;
1685 } else if (v == intvar[FS]) {
1686 mk_splitter(getvar_s(v), &fsplitter);
1688 } else if (v == intvar[RS]) {
1689 mk_splitter(getvar_s(v), &rsplitter);
1691 } else if (v == intvar[IGNORECASE]) {
1695 n = getvar_i(intvar[NF]);
1696 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1697 /* right here v is invalid. Just to note... */
1701 /* step through func/builtin/etc arguments */
1702 static node *nextarg(node **pn)
1707 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1716 static void hashwalk_init(var *v, xhash *array)
1722 if (v->type & VF_WALK)
1726 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1727 w[0] = w[1] = (char *)(w + 2);
1728 for (i = 0; i < array->csize; i++) {
1729 hi = array->items[i];
1731 strcpy(*w, hi->name);
1738 static int hashwalk_next(var *v)
1746 setvar_s(v, nextword(w+1));
1750 /* evaluate node, return 1 when result is true, 0 otherwise */
1751 static int ptest(node *pattern)
1753 /* ptest__v is "static": to save stack space? */
1754 return istrue(evaluate(pattern, &G.ptest__v));
1757 /* read next record from stream rsm into a variable v */
1758 static int awk_getline(rstream *rsm, var *v)
1761 regmatch_t pmatch[2];
1762 int a, p, pp=0, size;
1763 int fd, so, eo, r, rp;
1766 /* we're using our own buffer since we need access to accumulating
1769 fd = fileno(rsm->F);
1774 c = (char) rsplitter.n.info;
1777 if (!m) qrealloc(&m, 256, &size);
1783 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1784 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1785 b, 1, pmatch, 0) == 0) {
1786 so = pmatch[0].rm_so;
1787 eo = pmatch[0].rm_eo;
1791 } else if (c != '\0') {
1792 s = strchr(b+pp, c);
1793 if (!s) s = memchr(b+pp, '\0', p - pp);
1800 while (b[rp] == '\n')
1802 s = strstr(b+rp, "\n\n");
1805 while (b[eo] == '\n') eo++;
1813 memmove(m, (const void *)(m+a), p+1);
1818 qrealloc(&m, a+p+128, &size);
1821 p += safe_read(fd, b+p, size-p-1);
1825 setvar_i(intvar[ERRNO], errno);
1834 c = b[so]; b[so] = '\0';
1838 c = b[eo]; b[eo] = '\0';
1839 setvar_s(intvar[RT], b+so);
1851 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1855 const char *s = format;
1857 if (int_as_int && n == (int)n) {
1858 r = snprintf(b, size, "%d", (int)n);
1860 do { c = *s; } while (c && *++s);
1861 if (strchr("diouxX", c)) {
1862 r = snprintf(b, size, format, (int)n);
1863 } else if (strchr("eEfgG", c)) {
1864 r = snprintf(b, size, format, n);
1866 syntax_error(EMSG_INV_FMT);
1872 /* formatted output into an allocated buffer, return ptr to buffer */
1873 static char *awk_printf(node *n)
1878 int i, j, incr, bsize;
1883 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1888 while (*f && (*f != '%' || *(++f) == '%'))
1890 while (*f && !isalpha(*f)) {
1892 syntax_error("%*x formats are not supported");
1896 incr = (f - s) + MAXVARFMT;
1897 qrealloc(&b, incr + i, &bsize);
1902 arg = evaluate(nextarg(&n), v);
1905 if (c == 'c' || !c) {
1906 i += sprintf(b+i, s, is_numeric(arg) ?
1907 (char)getvar_i(arg) : *getvar_s(arg));
1908 } else if (c == 's') {
1910 qrealloc(&b, incr+i+strlen(s1), &bsize);
1911 i += sprintf(b+i, s, s1);
1913 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1917 /* if there was an error while sprintf, return value is negative */
1921 b = xrealloc(b, i + 1);
1928 /* common substitution routine
1929 * replace (nm) substring of (src) that match (n) with (repl), store
1930 * result into (dest), return number of substitutions. If nm=0, replace
1931 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1932 * subexpression matching (\1-\9)
1934 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1939 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1940 regmatch_t pmatch[10];
1943 re = as_regex(rn, &sreg);
1944 if (!src) src = intvar[F0];
1945 if (!dest) dest = intvar[F0];
1950 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1951 so = pmatch[0].rm_so;
1952 eo = pmatch[0].rm_eo;
1954 qrealloc(&ds, di + eo + rl, &dssize);
1955 memcpy(ds + di, sp, eo);
1961 for (s = repl; *s; s++) {
1967 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1968 di -= ((nbs + 3) >> 1);
1977 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1978 qrealloc(&ds, di + rl + n, &dssize);
1979 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1991 if (!ds[di++]) break;
1995 qrealloc(&ds, di + strlen(sp), &dssize);
1996 strcpy(ds + di, sp);
1998 if (re == &sreg) regfree(re);
2002 static NOINLINE int do_mktime(const char *ds)
2007 /*memset(&then, 0, sizeof(then)); - not needed */
2008 then.tm_isdst = -1; /* default is unknown */
2010 /* manpage of mktime says these fields are ints,
2011 * so we can sscanf stuff directly into them */
2012 count = sscanf(ds, "%u %u %u %u %u %u %d",
2013 &then.tm_year, &then.tm_mon, &then.tm_mday,
2014 &then.tm_hour, &then.tm_min, &then.tm_sec,
2018 || (unsigned)then.tm_mon < 1
2019 || (unsigned)then.tm_year < 1900
2025 then.tm_year -= 1900;
2027 return mktime(&then);
2030 static NOINLINE var *exec_builtin(node *op, var *res)
2032 #define tspl (G.exec_builtin__tspl)
2038 regmatch_t pmatch[2];
2048 isr = info = op->info;
2051 av[2] = av[3] = NULL;
2052 for (i = 0; i < 4 && op; i++) {
2053 an[i] = nextarg(&op);
2054 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2055 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2060 if ((uint32_t)nargs < (info >> 30))
2061 syntax_error(EMSG_TOO_FEW_ARGS);
2067 #if ENABLE_FEATURE_AWK_LIBM
2068 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2070 syntax_error(EMSG_NO_MATH);
2076 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2077 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2082 n = awk_split(as[0], spl, &s);
2084 clear_array(iamarray(av[1]));
2085 for (i = 1; i <= n; i++)
2086 setari_u(av[1], i, nextword(&s1));
2093 i = getvar_i(av[1]) - 1;
2096 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2098 s = xstrndup(as[0]+i, n);
2102 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2103 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2105 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2109 setvar_i(res, ~getvar_i_int(av[0]));
2113 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2117 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2121 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2125 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2130 s1 = s = xstrdup(as[0]);
2132 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2133 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2134 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2143 l = strlen(as[0]) - ll;
2144 if (ll > 0 && l >= 0) {
2146 s = strstr(as[0], as[1]);
2147 if (s) n = (s - as[0]) + 1;
2149 /* this piece of code is terribly slow and
2150 * really should be rewritten
2152 for (i=0; i<=l; i++) {
2153 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2165 tt = getvar_i(av[1]);
2168 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2169 i = strftime(g_buf, MAXVARFMT,
2170 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2173 setvar_s(res, g_buf);
2177 setvar_i(res, do_mktime(as[0]));
2181 re = as_regex(an[1], &sreg);
2182 n = regexec(re, as[0], 1, pmatch, 0);
2187 pmatch[0].rm_so = 0;
2188 pmatch[0].rm_eo = -1;
2190 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2191 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2192 setvar_i(res, pmatch[0].rm_so);
2193 if (re == &sreg) regfree(re);
2197 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2201 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2205 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2215 * Evaluate node - the heart of the program. Supplied with subtree
2216 * and place where to store result. returns ptr to result.
2218 #define XC(n) ((n) >> 8)
2220 static var *evaluate(node *op, var *res)
2222 /* This procedure is recursive so we should count every byte */
2223 #define fnargs (G.evaluate__fnargs)
2224 /* seed is initialized to 1 */
2225 #define seed (G.evaluate__seed)
2226 #define sreg (G.evaluate__sreg)
2248 return setvar_s(res, NULL);
2254 opn = (opinfo & OPNMASK);
2255 g_lineno = op->lineno;
2257 /* execute inevitable things */
2259 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2260 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2261 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2262 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2263 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2265 switch (XC(opinfo & OPCLSMASK)) {
2267 /* -- iterative node type -- */
2271 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2272 /* it's range pattern */
2273 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2274 op->info |= OF_CHECKED;
2275 if (ptest(op1->r.n))
2276 op->info &= ~OF_CHECKED;
2283 op = (ptest(op1)) ? op->a.n : op->r.n;
2287 /* just evaluate an expression, also used as unconditional jump */
2291 /* branch, used in if-else and various loops */
2293 op = istrue(L.v) ? op->a.n : op->r.n;
2296 /* initialize for-in loop */
2297 case XC( OC_WALKINIT ):
2298 hashwalk_init(L.v, iamarray(R.v));
2301 /* get next array item */
2302 case XC( OC_WALKNEXT ):
2303 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2306 case XC( OC_PRINT ):
2307 case XC( OC_PRINTF ):
2310 X.rsm = newfile(R.s);
2313 X.rsm->F = popen(R.s, "w");
2314 if (X.rsm->F == NULL)
2315 bb_perror_msg_and_die("popen");
2318 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2324 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2326 fputs(getvar_s(intvar[F0]), X.F);
2329 L.v = evaluate(nextarg(&op1), v1);
2330 if (L.v->type & VF_NUMBER) {
2331 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2332 getvar_i(L.v), TRUE);
2335 fputs(getvar_s(L.v), X.F);
2338 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2341 fputs(getvar_s(intvar[ORS]), X.F);
2343 } else { /* OC_PRINTF */
2344 L.s = awk_printf(op1);
2351 case XC( OC_DELETE ):
2352 X.info = op1->info & OPCLSMASK;
2353 if (X.info == OC_VAR) {
2355 } else if (X.info == OC_FNARG) {
2356 R.v = &fnargs[op1->l.i];
2358 syntax_error(EMSG_NOT_ARRAY);
2363 L.s = getvar_s(evaluate(op1->r.n, v1));
2364 hash_remove(iamarray(R.v), L.s);
2366 clear_array(iamarray(R.v));
2370 case XC( OC_NEWSOURCE ):
2371 g_progname = op->l.s;
2374 case XC( OC_RETURN ):
2378 case XC( OC_NEXTFILE ):
2389 /* -- recursive node type -- */
2393 if (L.v == intvar[NF])
2397 case XC( OC_FNARG ):
2398 L.v = &fnargs[op->l.i];
2400 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2404 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2407 case XC( OC_REGEXP ):
2409 L.s = getvar_s(intvar[F0]);
2412 case XC( OC_MATCH ):
2415 X.re = as_regex(op1, &sreg);
2416 R.i = regexec(X.re, L.s, 0, NULL, 0);
2417 if (X.re == &sreg) regfree(X.re);
2418 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2422 /* if source is a temporary string, jusk relink it to dest */
2423 if (R.v == v1+1 && R.v->string) {
2424 res = setvar_p(L.v, R.v->string);
2427 res = copyvar(L.v, R.v);
2431 case XC( OC_TERNARY ):
2432 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2433 syntax_error(EMSG_POSSIBLE_ERROR);
2434 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2438 if (!op->r.f->body.first)
2439 syntax_error(EMSG_UNDEF_FUNC);
2441 X.v = R.v = nvalloc(op->r.f->nargs+1);
2443 L.v = evaluate(nextarg(&op1), v1);
2445 R.v->type |= VF_CHILD;
2446 R.v->x.parent = L.v;
2447 if (++R.v - X.v >= op->r.f->nargs)
2455 res = evaluate(op->r.f->body.first, res);
2462 case XC( OC_GETLINE ):
2463 case XC( OC_PGETLINE ):
2465 X.rsm = newfile(L.s);
2467 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2468 X.rsm->F = popen(L.s, "r");
2469 X.rsm->is_pipe = TRUE;
2471 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2475 if (!iF) iF = next_input_file();
2480 setvar_i(intvar[ERRNO], errno);
2488 L.i = awk_getline(X.rsm, R.v);
2491 incvar(intvar[FNR]);
2498 /* simple builtins */
2499 case XC( OC_FBLTIN ):
2507 R.d = (double)rand() / (double)RAND_MAX;
2509 #if ENABLE_FEATURE_AWK_LIBM
2535 syntax_error(EMSG_NO_MATH);
2540 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2550 L.s = getvar_s(intvar[F0]);
2556 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2557 ? (system(L.s) >> 8) : 0;
2565 X.rsm = newfile(L.s);
2574 X.rsm = (rstream *)hash_search(fdhash, L.s);
2576 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2577 free(X.rsm->buffer);
2578 hash_remove(fdhash, L.s);
2581 setvar_i(intvar[ERRNO], errno);
2588 case XC( OC_BUILTIN ):
2589 res = exec_builtin(op, res);
2592 case XC( OC_SPRINTF ):
2593 setvar_p(res, awk_printf(op1));
2596 case XC( OC_UNARY ):
2598 L.d = R.d = getvar_i(R.v);
2613 L.d = istrue(X.v) ? 0 : 1;
2624 case XC( OC_FIELD ):
2625 R.i = (int)getvar_i(R.v);
2632 res = &Fields[R.i - 1];
2636 /* concatenation (" ") and index joining (",") */
2637 case XC( OC_CONCAT ):
2638 case XC( OC_COMMA ):
2639 opn = strlen(L.s) + strlen(R.s) + 2;
2642 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2643 L.s = getvar_s(intvar[SUBSEP]);
2644 X.s = xrealloc(X.s, opn + strlen(L.s));
2652 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2656 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2659 case XC( OC_BINARY ):
2660 case XC( OC_REPLACE ):
2661 R.d = getvar_i(R.v);
2673 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2677 #if ENABLE_FEATURE_AWK_LIBM
2678 L.d = pow(L.d, R.d);
2680 syntax_error(EMSG_NO_MATH);
2684 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2685 L.d -= (int)(L.d / R.d) * R.d;
2688 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2691 case XC( OC_COMPARE ):
2692 if (is_numeric(L.v) && is_numeric(R.v)) {
2693 L.d = getvar_i(L.v) - getvar_i(R.v);
2695 L.s = getvar_s(L.v);
2696 R.s = getvar_s(R.v);
2697 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2699 switch (opn & 0xfe) {
2710 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2714 syntax_error(EMSG_POSSIBLE_ERROR);
2716 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2718 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2731 /* -------- main & co. -------- */
2733 static int awk_exit(int r)
2744 evaluate(endseq.first, &tv);
2747 /* waiting for children */
2748 for (i = 0; i < fdhash->csize; i++) {
2749 hi = fdhash->items[i];
2751 if (hi->data.rs.F && hi->data.rs.is_pipe)
2752 pclose(hi->data.rs.F);
2760 /* if expr looks like "var=value", perform assignment and return 1,
2761 * otherwise return 0 */
2762 static int is_assignment(const char *expr)
2764 char *exprc, *s, *s0, *s1;
2766 exprc = xstrdup(expr);
2767 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2775 *(s1++) = nextchar(&s);
2778 setvar_u(newvar(exprc), s0);
2783 /* switch to next input file */
2784 static rstream *next_input_file(void)
2786 #define rsm (G.next_input_file__rsm)
2787 #define files_happen (G.next_input_file__files_happen)
2790 const char *fname, *ind;
2792 if (rsm.F) fclose(rsm.F);
2794 rsm.pos = rsm.adv = 0;
2797 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2803 ind = getvar_s(incvar(intvar[ARGIND]));
2804 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2805 if (fname && *fname && !is_assignment(fname))
2806 F = xfopen_stdin(fname);
2810 files_happen = TRUE;
2811 setvar_s(intvar[FILENAME], fname);
2818 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2819 int awk_main(int argc, char **argv)
2822 char *opt_F, *opt_W;
2823 llist_t *list_v = NULL;
2824 llist_t *list_f = NULL;
2829 char *vnames = (char *)vNames; /* cheat */
2830 char *vvalues = (char *)vValues;
2834 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2835 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2836 if (ENABLE_LOCALE_SUPPORT)
2837 setlocale(LC_NUMERIC, "C");
2841 /* allocate global buffer */
2842 g_buf = xmalloc(MAXVARFMT + 1);
2844 vhash = hash_init();
2845 ahash = hash_init();
2846 fdhash = hash_init();
2847 fnhash = hash_init();
2849 /* initialize variables */
2850 for (i = 0; *vnames; i++) {
2851 intvar[i] = v = newvar(nextword(&vnames));
2852 if (*vvalues != '\377')
2853 setvar_s(v, nextword(&vvalues));
2857 if (*vnames == '*') {
2858 v->type |= VF_SPECIAL;
2863 handle_special(intvar[FS]);
2864 handle_special(intvar[RS]);
2866 newfile("/dev/stdin")->F = stdin;
2867 newfile("/dev/stdout")->F = stdout;
2868 newfile("/dev/stderr")->F = stderr;
2870 /* Huh, people report that sometimes environ is NULL. Oh well. */
2871 if (environ) for (envp = environ; *envp; envp++) {
2872 /* environ is writable, thus we don't strdup it needlessly */
2874 char *s1 = strchr(s, '=');
2877 /* Both findvar and setvar_u take const char*
2878 * as 2nd arg -> environment is not trashed */
2879 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2883 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2884 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2888 setvar_s(intvar[FS], opt_F); // -F
2889 while (list_v) { /* -v */
2890 if (!is_assignment(llist_pop(&list_v)))
2893 if (list_f) { /* -f */
2898 g_progname = llist_pop(&list_f);
2899 from_file = xfopen_stdin(g_progname);
2900 /* one byte is reserved for some trick in next_token */
2901 for (i = j = 1; j > 0; i += j) {
2902 s = xrealloc(s, i + 4096);
2903 j = fread(s + i, 1, 4094, from_file);
2907 parse_program(s + 1);
2911 } else { // no -f: take program from 1st parameter
2914 g_progname = "cmd. line";
2915 parse_program(*argv++);
2917 if (opt & 0x8) // -W
2918 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2920 /* fill in ARGV array */
2921 setvar_i(intvar[ARGC], argc);
2922 setari_u(intvar[ARGV], 0, "awk");
2925 setari_u(intvar[ARGV], ++i, *argv++);
2927 evaluate(beginseq.first, &tv);
2928 if (!mainseq.first && !endseq.first)
2929 awk_exit(EXIT_SUCCESS);
2931 /* input file could already be opened in BEGIN block */
2932 if (!iF) iF = next_input_file();
2934 /* passing through input files */
2937 setvar_i(intvar[FNR], 0);
2939 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2942 incvar(intvar[FNR]);
2943 evaluate(mainseq.first, &tv);
2950 syntax_error(strerror(errno));
2952 iF = next_input_file();
2955 awk_exit(EXIT_SUCCESS);