1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
14 //usage: "\n -v VAR=VAL Set variable"
15 //usage: "\n -F SEP Use SEP as field separator"
16 //usage: "\n -f FILE Read program from FILE"
22 /* This is a NOEXEC applet. Be very careful! */
25 /* If you comment out one of these below, it will be #defined later
26 * to perform debug printfs to stderr: */
27 #define debug_printf_walker(...) do {} while (0)
28 #define debug_printf_eval(...) do {} while (0)
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
43 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
44 #define VF_ARRAY 0x0002 /* 1 = it's an array */
46 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
47 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
48 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
49 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
50 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
51 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
52 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
54 /* these flags are static, don't change them when value is changed */
55 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
57 typedef struct walker_list {
60 struct walker_list *prev;
65 typedef struct var_s {
66 unsigned type; /* flags */
70 int aidx; /* func arg idx (for compilation stage) */
71 struct xhash_s *array; /* array ptr */
72 struct var_s *parent; /* for func args, ptr to actual parameter */
73 walker_list *walker; /* list of array elements (for..in) */
77 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
78 typedef struct chain_s {
81 const char *programname;
85 typedef struct func_s {
91 typedef struct rstream_s {
100 typedef struct hash_item_s {
102 struct var_s v; /* variable/array hash */
103 struct rstream_s rs; /* redirect streams hash */
104 struct func_s f; /* functions hash */
106 struct hash_item_s *next; /* next in chain */
107 char name[1]; /* really it's longer */
110 typedef struct xhash_s {
111 unsigned nel; /* num of elements */
112 unsigned csize; /* current hash size */
113 unsigned nprime; /* next hash size in PRIMES[] */
114 unsigned glen; /* summary length of item names */
115 struct hash_item_s **items;
119 typedef struct node_s {
139 /* Block of temporary variables */
140 typedef struct nvblock_s {
143 struct nvblock_s *prev;
144 struct nvblock_s *next;
148 typedef struct tsplitter_s {
153 /* simple token classes */
154 /* Order and hex values are very important!!! See next_token() */
155 #define TC_SEQSTART 1 /* ( */
156 #define TC_SEQTERM (1 << 1) /* ) */
157 #define TC_REGEXP (1 << 2) /* /.../ */
158 #define TC_OUTRDR (1 << 3) /* | > >> */
159 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
160 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
161 #define TC_BINOPX (1 << 6) /* two-opnd operator */
162 #define TC_IN (1 << 7)
163 #define TC_COMMA (1 << 8)
164 #define TC_PIPE (1 << 9) /* input redirection pipe */
165 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
166 #define TC_ARRTERM (1 << 11) /* ] */
167 #define TC_GRPSTART (1 << 12) /* { */
168 #define TC_GRPTERM (1 << 13) /* } */
169 #define TC_SEMICOL (1 << 14)
170 #define TC_NEWLINE (1 << 15)
171 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
172 #define TC_WHILE (1 << 17)
173 #define TC_ELSE (1 << 18)
174 #define TC_BUILTIN (1 << 19)
175 #define TC_GETLINE (1 << 20)
176 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
177 #define TC_BEGIN (1 << 22)
178 #define TC_END (1 << 23)
179 #define TC_EOF (1 << 24)
180 #define TC_VARIABLE (1 << 25)
181 #define TC_ARRAY (1 << 26)
182 #define TC_FUNCTION (1 << 27)
183 #define TC_STRING (1 << 28)
184 #define TC_NUMBER (1 << 29)
186 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
188 /* combined token classes */
189 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
190 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
191 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
192 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
194 #define TC_STATEMNT (TC_STATX | TC_WHILE)
195 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
197 /* word tokens, cannot mean something else if not expected */
198 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
199 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
201 /* discard newlines after these */
202 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
203 | TC_BINOP | TC_OPTERM)
205 /* what can expression begin with */
206 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
207 /* what can group begin with */
208 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
210 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
211 /* operator is inserted between them */
212 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
213 | TC_STRING | TC_NUMBER | TC_UOPPOST)
214 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
216 #define OF_RES1 0x010000
217 #define OF_RES2 0x020000
218 #define OF_STR1 0x040000
219 #define OF_STR2 0x080000
220 #define OF_NUM1 0x100000
221 #define OF_CHECKED 0x200000
223 /* combined operator flags */
226 #define xS (OF_RES2 | OF_STR2)
228 #define VV (OF_RES1 | OF_RES2)
229 #define Nx (OF_RES1 | OF_NUM1)
230 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
231 #define Sx (OF_RES1 | OF_STR1)
232 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
233 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
235 #define OPCLSMASK 0xFF00
236 #define OPNMASK 0x007F
238 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
239 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
240 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
242 #define P(x) (x << 24)
243 #define PRIMASK 0x7F000000
244 #define PRIMASK2 0x7E000000
246 /* Operation classes */
248 #define SHIFT_TIL_THIS 0x0600
249 #define RECUR_FROM_THIS 0x1000
252 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
253 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
255 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
256 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
257 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
259 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
260 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
261 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
262 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
263 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
264 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
265 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
266 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
269 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
273 /* simple builtins */
275 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
276 F_ti, F_le, F_sy, F_ff, F_cl
281 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
283 B_an, B_co, B_ls, B_or, B_rs, B_xo,
286 /* tokens and their corresponding info values */
288 #define NTC "\377" /* switch to next token class (tc<<1) */
291 #define OC_B OC_BUILTIN
293 static const char tokenlist[] ALIGN1 =
296 "\1/" NTC /* REGEXP */
297 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
298 "\2++" "\2--" NTC /* UOPPOST */
299 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
300 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
301 "\2*=" "\2/=" "\2%=" "\2^="
302 "\1+" "\1-" "\3**=" "\2**"
303 "\1/" "\1%" "\1^" "\1*"
304 "\2!=" "\2>=" "\2<=" "\1>"
305 "\1<" "\2!~" "\1~" "\2&&"
306 "\2||" "\1?" "\1:" NTC
310 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
316 "\2if" "\2do" "\3for" "\5break" /* STATX */
317 "\10continue" "\6delete" "\5print"
318 "\6printf" "\4next" "\10nextfile"
319 "\6return" "\4exit" NTC
323 "\3and" "\5compl" "\6lshift" "\2or"
325 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
326 "\3cos" "\3exp" "\3int" "\3log"
327 "\4rand" "\3sin" "\4sqrt" "\5srand"
328 "\6gensub" "\4gsub" "\5index" "\6length"
329 "\5match" "\5split" "\7sprintf" "\3sub"
330 "\6substr" "\7systime" "\10strftime" "\6mktime"
331 "\7tolower" "\7toupper" NTC
333 "\4func" "\10function" NTC
336 /* compiler adds trailing "\0" */
339 static const uint32_t tokeninfo[] = {
343 xS|'a', xS|'w', xS|'|',
344 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
345 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
346 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
347 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
348 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
349 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
350 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
351 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
352 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
353 OC_IN|SV|P(49), /* in */
355 OC_PGETLINE|SV|P(37),
356 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
362 ST_IF, ST_DO, ST_FOR, OC_BREAK,
363 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
364 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
365 OC_RETURN|Vx, OC_EXIT|Nx,
369 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
376 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384 /* internal variable names and their initial values */
385 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
387 CONVFMT, OFMT, FS, OFS,
388 ORS, RS, RT, FILENAME,
389 SUBSEP, F0, ARGIND, ARGC,
390 ARGV, ERRNO, FNR, NR,
391 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
394 static const char vNames[] ALIGN1 =
395 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
396 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
397 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
398 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
399 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
401 static const char vValues[] ALIGN1 =
402 "%.6g\0" "%.6g\0" " \0" " \0"
403 "\n\0" "\n\0" "\0" "\0"
404 "\034\0" "\0" "\377";
406 /* hash size may grow to these values */
407 #define FIRST_PRIME 61
408 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
411 /* Globals. Split in two parts so that first one is addressed
412 * with (mostly short) negative offsets.
413 * NB: it's unsafe to put members of type "double"
414 * into globals2 (gcc may fail to align them).
418 chain beginseq, mainseq, endseq;
420 node *break_ptr, *continue_ptr;
422 xhash *vhash, *ahash, *fdhash, *fnhash;
423 const char *g_progname;
426 int maxfields; /* used in fsrealloc() only */
435 smallint is_f0_split;
438 uint32_t t_info; /* often used */
444 var *intvar[NUM_INTERNAL_VARS]; /* often used */
446 /* former statics from various functions */
447 char *split_f0__fstrings;
449 uint32_t next_token__save_tclass;
450 uint32_t next_token__save_info;
451 uint32_t next_token__ltclass;
452 smallint next_token__concat_inserted;
454 smallint next_input_file__files_happen;
455 rstream next_input_file__rsm;
457 var *evaluate__fnargs;
458 unsigned evaluate__seed;
459 regex_t evaluate__sreg;
463 tsplitter exec_builtin__tspl;
465 /* biggest and least used members go last */
466 tsplitter fsplitter, rsplitter;
468 #define G1 (ptr_to_globals[-1])
469 #define G (*(struct globals2 *)ptr_to_globals)
470 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471 /*char G1size[sizeof(G1)]; - 0x74 */
472 /*char Gsize[sizeof(G)]; - 0x1c4 */
473 /* Trying to keep most of members accessible with short offsets: */
474 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475 #define t_double (G1.t_double )
476 #define beginseq (G1.beginseq )
477 #define mainseq (G1.mainseq )
478 #define endseq (G1.endseq )
479 #define seq (G1.seq )
480 #define break_ptr (G1.break_ptr )
481 #define continue_ptr (G1.continue_ptr)
483 #define vhash (G1.vhash )
484 #define ahash (G1.ahash )
485 #define fdhash (G1.fdhash )
486 #define fnhash (G1.fnhash )
487 #define g_progname (G1.g_progname )
488 #define g_lineno (G1.g_lineno )
489 #define nfields (G1.nfields )
490 #define maxfields (G1.maxfields )
491 #define Fields (G1.Fields )
492 #define g_cb (G1.g_cb )
493 #define g_pos (G1.g_pos )
494 #define g_buf (G1.g_buf )
495 #define icase (G1.icase )
496 #define exiting (G1.exiting )
497 #define nextrec (G1.nextrec )
498 #define nextfile (G1.nextfile )
499 #define is_f0_split (G1.is_f0_split )
500 #define t_info (G.t_info )
501 #define t_tclass (G.t_tclass )
502 #define t_string (G.t_string )
503 #define t_lineno (G.t_lineno )
504 #define t_rollback (G.t_rollback )
505 #define intvar (G.intvar )
506 #define fsplitter (G.fsplitter )
507 #define rsplitter (G.rsplitter )
508 #define INIT_G() do { \
509 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510 G.next_token__ltclass = TC_OPTERM; \
511 G.evaluate__seed = 1; \
515 /* function prototypes */
516 static void handle_special(var *);
517 static node *parse_expr(uint32_t);
518 static void chain_group(void);
519 static var *evaluate(node *, var *);
520 static rstream *next_input_file(void);
521 static int fmt_num(char *, int, const char *, double, int);
522 static int awk_exit(int) NORETURN;
524 /* ---- error handling ---- */
526 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
537 static void zero_out_var(var *vp)
539 memset(vp, 0, sizeof(*vp));
542 static void syntax_error(const char *message) NORETURN;
543 static void syntax_error(const char *message)
545 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
548 /* ---- hash stuff ---- */
550 static unsigned hashidx(const char *name)
555 idx = *name++ + (idx << 6) - idx;
559 /* create new hash */
560 static xhash *hash_init(void)
564 newhash = xzalloc(sizeof(*newhash));
565 newhash->csize = FIRST_PRIME;
566 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
571 /* find item in hash, return ptr to data, NULL if not found */
572 static void *hash_search(xhash *hash, const char *name)
576 hi = hash->items[hashidx(name) % hash->csize];
578 if (strcmp(hi->name, name) == 0)
585 /* grow hash if it becomes too big */
586 static void hash_rebuild(xhash *hash)
588 unsigned newsize, i, idx;
589 hash_item **newitems, *hi, *thi;
591 if (hash->nprime == ARRAY_SIZE(PRIMES))
594 newsize = PRIMES[hash->nprime++];
595 newitems = xzalloc(newsize * sizeof(newitems[0]));
597 for (i = 0; i < hash->csize; i++) {
602 idx = hashidx(thi->name) % newsize;
603 thi->next = newitems[idx];
609 hash->csize = newsize;
610 hash->items = newitems;
613 /* find item in hash, add it if necessary. Return ptr to data */
614 static void *hash_find(xhash *hash, const char *name)
620 hi = hash_search(hash, name);
622 if (++hash->nel / hash->csize > 10)
625 l = strlen(name) + 1;
626 hi = xzalloc(sizeof(*hi) + l);
627 strcpy(hi->name, name);
629 idx = hashidx(name) % hash->csize;
630 hi->next = hash->items[idx];
631 hash->items[idx] = hi;
637 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
638 #define newvar(name) ((var*) hash_find(vhash, (name)))
639 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
640 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
642 static void hash_remove(xhash *hash, const char *name)
644 hash_item *hi, **phi;
646 phi = &hash->items[hashidx(name) % hash->csize];
649 if (strcmp(hi->name, name) == 0) {
650 hash->glen -= (strlen(name) + 1);
660 /* ------ some useful functions ------ */
662 static char *skip_spaces(char *p)
665 if (*p == '\\' && p[1] == '\n') {
668 } else if (*p != ' ' && *p != '\t') {
676 /* returns old *s, advances *s past word and terminating NUL */
677 static char *nextword(char **s)
680 while (*(*s)++ != '\0')
685 static char nextchar(char **s)
692 c = bb_process_escape_sequence((const char**)s);
693 if (c == '\\' && *s == pps) { /* unrecognized \z? */
694 c = *(*s); /* yes, fetch z */
696 (*s)++; /* advance unless z = NUL */
701 static ALWAYS_INLINE int isalnum_(int c)
703 return (isalnum(c) || c == '_');
706 static double my_strtod(char **pp)
709 if (ENABLE_DESKTOP && cp[0] == '0') {
710 /* Might be hex or octal integer: 0x123abc or 07777 */
711 char c = (cp[1] | 0x20);
712 if (c == 'x' || isdigit(cp[1])) {
713 unsigned long long ull = strtoull(cp, pp, 0);
717 if (!isdigit(c) && c != '.')
719 /* else: it may be a floating number. Examples:
720 * 009.123 (*pp points to '9')
721 * 000.123 (*pp points to '.')
722 * fall through to strtod.
726 return strtod(cp, pp);
729 /* -------- working with variables (set/get/copy/etc) -------- */
731 static xhash *iamarray(var *v)
735 while (a->type & VF_CHILD)
738 if (!(a->type & VF_ARRAY)) {
740 a->x.array = hash_init();
745 static void clear_array(xhash *array)
750 for (i = 0; i < array->csize; i++) {
751 hi = array->items[i];
755 free(thi->data.v.string);
758 array->items[i] = NULL;
760 array->glen = array->nel = 0;
763 /* clear a variable */
764 static var *clrvar(var *v)
766 if (!(v->type & VF_FSTR))
769 v->type &= VF_DONTTOUCH;
775 /* assign string value to variable */
776 static var *setvar_p(var *v, char *value)
784 /* same as setvar_p but make a copy of string */
785 static var *setvar_s(var *v, const char *value)
787 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
790 /* same as setvar_s but sets USER flag */
791 static var *setvar_u(var *v, const char *value)
793 v = setvar_s(v, value);
798 /* set array element to user string */
799 static void setari_u(var *a, int idx, const char *s)
803 v = findvar(iamarray(a), itoa(idx));
807 /* assign numeric value to variable */
808 static var *setvar_i(var *v, double value)
811 v->type |= VF_NUMBER;
817 static const char *getvar_s(var *v)
819 /* if v is numeric and has no cached string, convert it to string */
820 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
821 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
822 v->string = xstrdup(g_buf);
823 v->type |= VF_CACHED;
825 return (v->string == NULL) ? "" : v->string;
828 static double getvar_i(var *v)
832 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
836 debug_printf_eval("getvar_i: '%s'->", s);
837 v->number = my_strtod(&s);
838 debug_printf_eval("%f (s:'%s')\n", v->number, s);
839 if (v->type & VF_USER) {
845 debug_printf_eval("getvar_i: '%s'->zero\n", s);
848 v->type |= VF_CACHED;
850 debug_printf_eval("getvar_i: %f\n", v->number);
854 /* Used for operands of bitwise ops */
855 static unsigned long getvar_i_int(var *v)
857 double d = getvar_i(v);
859 /* Casting doubles to longs is undefined for values outside
860 * of target type range. Try to widen it as much as possible */
862 return (unsigned long)d;
863 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
864 return - (long) (unsigned long) (-d);
867 static var *copyvar(var *dest, const var *src)
871 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
872 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
873 dest->number = src->number;
875 dest->string = xstrdup(src->string);
877 handle_special(dest);
881 static var *incvar(var *v)
883 return setvar_i(v, getvar_i(v) + 1.0);
886 /* return true if v is number or numeric string */
887 static int is_numeric(var *v)
890 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
893 /* return 1 when value of v corresponds to true, 0 otherwise */
894 static int istrue(var *v)
897 return (v->number != 0);
898 return (v->string && v->string[0]);
901 /* temporary variables allocator. Last allocated should be first freed */
902 static var *nvalloc(int n)
910 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
916 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
917 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
919 g_cb->pos = g_cb->nv;
921 /*g_cb->next = NULL; - xzalloc did it */
929 while (v < g_cb->pos) {
938 static void nvfree(var *v)
942 if (v < g_cb->nv || v >= g_cb->pos)
943 syntax_error(EMSG_INTERNAL_ERROR);
945 for (p = v; p < g_cb->pos; p++) {
946 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
947 clear_array(iamarray(p));
948 free(p->x.array->items);
951 if (p->type & VF_WALK) {
953 walker_list *w = p->x.walker;
954 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
958 debug_printf_walker(" free(%p)\n", w);
967 while (g_cb->prev && g_cb->pos == g_cb->nv) {
972 /* ------- awk program text parsing ------- */
974 /* Parse next token pointed by global pos, place results into global ttt.
975 * If token isn't expected, give away. Return token class
977 static uint32_t next_token(uint32_t expected)
979 #define concat_inserted (G.next_token__concat_inserted)
980 #define save_tclass (G.next_token__save_tclass)
981 #define save_info (G.next_token__save_info)
982 /* Initialized to TC_OPTERM: */
983 #define ltclass (G.next_token__ltclass)
993 } else if (concat_inserted) {
994 concat_inserted = FALSE;
995 t_tclass = save_tclass;
1002 g_lineno = t_lineno;
1004 while (*p != '\n' && *p != '\0')
1013 } else if (*p == '\"') {
1016 while (*p != '\"') {
1018 if (*p == '\0' || *p == '\n')
1019 syntax_error(EMSG_UNEXP_EOS);
1021 *s++ = nextchar(&pp);
1028 } else if ((expected & TC_REGEXP) && *p == '/') {
1032 if (*p == '\0' || *p == '\n')
1033 syntax_error(EMSG_UNEXP_EOS);
1037 s[-1] = bb_process_escape_sequence((const char **)&pp);
1050 } else if (*p == '.' || isdigit(*p)) {
1053 t_double = my_strtod(&pp);
1056 syntax_error(EMSG_UNEXP_TOKEN);
1060 /* search for something known */
1065 int l = (unsigned char) *tl++;
1066 if (l == (unsigned char) NTCC) {
1070 /* if token class is expected,
1072 * and it's not a longer word,
1074 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1075 && strncmp(p, tl, l) == 0
1076 && !((tc & TC_WORD) && isalnum_(p[l]))
1078 /* then this is what we are looking for */
1086 /* not a known token */
1088 /* is it a name? (var/array/function) */
1090 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1093 while (isalnum_(*++p)) {
1098 /* also consume whitespace between functionname and bracket */
1099 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1113 /* skipping newlines in some cases */
1114 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1117 /* insert concatenation operator when needed */
1118 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1119 concat_inserted = TRUE;
1123 t_info = OC_CONCAT | SS | P(35);
1130 /* Are we ready for this? */
1131 if (!(ltclass & expected))
1132 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1133 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1136 #undef concat_inserted
1142 static void rollback_token(void)
1147 static node *new_node(uint32_t info)
1151 n = xzalloc(sizeof(node));
1153 n->lineno = g_lineno;
1157 static void mk_re_node(const char *s, node *n, regex_t *re)
1159 n->info = OC_REGEXP;
1162 xregcomp(re, s, REG_EXTENDED);
1163 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1166 static node *condition(void)
1168 next_token(TC_SEQSTART);
1169 return parse_expr(TC_SEQTERM);
1172 /* parse expression terminated by given argument, return ptr
1173 * to built subtree. Terminator is eaten by parse_expr */
1174 static node *parse_expr(uint32_t iexp)
1183 sn.r.n = glptr = NULL;
1184 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1186 while (!((tc = next_token(xtc)) & iexp)) {
1188 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1189 /* input redirection (<) attached to glptr node */
1190 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1192 xtc = TC_OPERAND | TC_UOPPRE;
1195 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1196 /* for binary and postfix-unary operators, jump back over
1197 * previous operators with higher priority */
1199 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1200 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1204 if ((t_info & OPCLSMASK) == OC_TERNARY)
1206 cn = vn->a.n->r.n = new_node(t_info);
1208 if (tc & TC_BINOP) {
1210 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1211 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1213 next_token(TC_GETLINE);
1214 /* give maximum priority to this pipe */
1215 cn->info &= ~PRIMASK;
1216 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1220 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1225 /* for operands and prefix-unary operators, attach them
1228 cn = vn->r.n = new_node(t_info);
1230 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1231 if (tc & (TC_OPERAND | TC_REGEXP)) {
1232 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1233 /* one should be very careful with switch on tclass -
1234 * only simple tclasses should be used! */
1239 v = hash_search(ahash, t_string);
1241 cn->info = OC_FNARG;
1242 cn->l.aidx = v->x.aidx;
1244 cn->l.v = newvar(t_string);
1246 if (tc & TC_ARRAY) {
1248 cn->r.n = parse_expr(TC_ARRTERM);
1255 v = cn->l.v = xzalloc(sizeof(var));
1257 setvar_i(v, t_double);
1259 setvar_s(v, t_string);
1263 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1268 cn->r.f = newfunc(t_string);
1269 cn->l.n = condition();
1273 cn = vn->r.n = parse_expr(TC_SEQTERM);
1279 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1283 cn->l.n = condition();
1292 /* add node to chain. Return ptr to alloc'd node */
1293 static node *chain_node(uint32_t info)
1298 seq->first = seq->last = new_node(0);
1300 if (seq->programname != g_progname) {
1301 seq->programname = g_progname;
1302 n = chain_node(OC_NEWSOURCE);
1303 n->l.new_progname = xstrdup(g_progname);
1308 seq->last = n->a.n = new_node(OC_DONE);
1313 static void chain_expr(uint32_t info)
1317 n = chain_node(info);
1318 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1319 if (t_tclass & TC_GRPTERM)
1323 static node *chain_loop(node *nn)
1325 node *n, *n2, *save_brk, *save_cont;
1327 save_brk = break_ptr;
1328 save_cont = continue_ptr;
1330 n = chain_node(OC_BR | Vx);
1331 continue_ptr = new_node(OC_EXEC);
1332 break_ptr = new_node(OC_EXEC);
1334 n2 = chain_node(OC_EXEC | Vx);
1337 continue_ptr->a.n = n2;
1338 break_ptr->a.n = n->r.n = seq->last;
1340 continue_ptr = save_cont;
1341 break_ptr = save_brk;
1346 /* parse group and attach it to chain */
1347 static void chain_group(void)
1353 c = next_token(TC_GRPSEQ);
1354 } while (c & TC_NEWLINE);
1356 if (c & TC_GRPSTART) {
1357 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1358 if (t_tclass & TC_NEWLINE)
1363 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1365 chain_expr(OC_EXEC | Vx);
1366 } else { /* TC_STATEMNT */
1367 switch (t_info & OPCLSMASK) {
1369 n = chain_node(OC_BR | Vx);
1370 n->l.n = condition();
1372 n2 = chain_node(OC_EXEC);
1374 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1376 n2->a.n = seq->last;
1384 n = chain_loop(NULL);
1389 n2 = chain_node(OC_EXEC);
1390 n = chain_loop(NULL);
1392 next_token(TC_WHILE);
1393 n->l.n = condition();
1397 next_token(TC_SEQSTART);
1398 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1399 if (t_tclass & TC_SEQTERM) { /* for-in */
1400 if ((n2->info & OPCLSMASK) != OC_IN)
1401 syntax_error(EMSG_UNEXP_TOKEN);
1402 n = chain_node(OC_WALKINIT | VV);
1405 n = chain_loop(NULL);
1406 n->info = OC_WALKNEXT | Vx;
1408 } else { /* for (;;) */
1409 n = chain_node(OC_EXEC | Vx);
1411 n2 = parse_expr(TC_SEMICOL);
1412 n3 = parse_expr(TC_SEQTERM);
1422 n = chain_node(t_info);
1423 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1424 if (t_tclass & TC_OUTRDR) {
1426 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1428 if (t_tclass & TC_GRPTERM)
1433 n = chain_node(OC_EXEC);
1438 n = chain_node(OC_EXEC);
1439 n->a.n = continue_ptr;
1442 /* delete, next, nextfile, return, exit */
1449 static void parse_program(char *p)
1458 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1459 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1461 if (tclass & TC_OPTERM)
1465 if (tclass & TC_BEGIN) {
1469 } else if (tclass & TC_END) {
1473 } else if (tclass & TC_FUNCDECL) {
1474 next_token(TC_FUNCTION);
1476 f = newfunc(t_string);
1477 f->body.first = NULL;
1479 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1480 v = findvar(ahash, t_string);
1481 v->x.aidx = f->nargs++;
1483 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1490 } else if (tclass & TC_OPSEQ) {
1492 cn = chain_node(OC_TEST);
1493 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1494 if (t_tclass & TC_GRPSTART) {
1498 chain_node(OC_PRINT);
1500 cn->r.n = mainseq.last;
1502 } else /* if (tclass & TC_GRPSTART) */ {
1510 /* -------- program execution part -------- */
1512 static node *mk_splitter(const char *s, tsplitter *spl)
1520 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1522 regfree(ire); // TODO: nuke ire, use re+1?
1524 if (s[0] && s[1]) { /* strlen(s) > 1 */
1525 mk_re_node(s, n, re);
1527 n->info = (uint32_t) s[0];
1533 /* use node as a regular expression. Supplied with node ptr and regex_t
1534 * storage space. Return ptr to regex (if result points to preg, it should
1535 * be later regfree'd manually
1537 static regex_t *as_regex(node *op, regex_t *preg)
1543 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1544 return icase ? op->r.ire : op->l.re;
1547 s = getvar_s(evaluate(op, v));
1549 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1550 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1551 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1552 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1553 * (maybe gsub is not supposed to use REG_EXTENDED?).
1555 if (regcomp(preg, s, cflags)) {
1556 cflags &= ~REG_EXTENDED;
1557 xregcomp(preg, s, cflags);
1563 /* gradually increasing buffer.
1564 * note that we reallocate even if n == old_size,
1565 * and thus there is at least one extra allocated byte.
1567 static char* qrealloc(char *b, int n, int *size)
1569 if (!b || n >= *size) {
1570 *size = n + (n>>1) + 80;
1571 b = xrealloc(b, *size);
1576 /* resize field storage space */
1577 static void fsrealloc(int size)
1581 if (size >= maxfields) {
1583 maxfields = size + 16;
1584 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1585 for (; i < maxfields; i++) {
1586 Fields[i].type = VF_SPECIAL;
1587 Fields[i].string = NULL;
1590 /* if size < nfields, clear extra field variables */
1591 for (i = size; i < nfields; i++) {
1597 static int awk_split(const char *s, node *spl, char **slist)
1602 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1604 /* in worst case, each char would be a separate field */
1605 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1608 c[0] = c[1] = (char)spl->info;
1610 if (*getvar_s(intvar[RS]) == '\0')
1614 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1616 return n; /* "": zero fields */
1617 n++; /* at least one field will be there */
1619 l = strcspn(s, c+2); /* len till next NUL or \n */
1620 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1621 && pmatch[0].rm_so <= l
1623 l = pmatch[0].rm_so;
1624 if (pmatch[0].rm_eo == 0) {
1628 n++; /* we saw yet another delimiter */
1630 pmatch[0].rm_eo = l;
1635 /* make sure we remove *all* of the separator chars */
1638 } while (++l < pmatch[0].rm_eo);
1640 s += pmatch[0].rm_eo;
1644 if (c[0] == '\0') { /* null split */
1652 if (c[0] != ' ') { /* single-character split */
1654 c[0] = toupper(c[0]);
1655 c[1] = tolower(c[1]);
1659 while ((s1 = strpbrk(s1, c)) != NULL) {
1667 s = skip_whitespace(s);
1671 while (*s && !isspace(*s))
1678 static void split_f0(void)
1680 /* static char *fstrings; */
1681 #define fstrings (G.split_f0__fstrings)
1692 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1695 for (i = 0; i < n; i++) {
1696 Fields[i].string = nextword(&s);
1697 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1700 /* set NF manually to avoid side effects */
1702 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1703 intvar[NF]->number = nfields;
1707 /* perform additional actions when some internal variables changed */
1708 static void handle_special(var *v)
1712 const char *sep, *s;
1713 int sl, l, len, i, bsize;
1715 if (!(v->type & VF_SPECIAL))
1718 if (v == intvar[NF]) {
1719 n = (int)getvar_i(v);
1722 /* recalculate $0 */
1723 sep = getvar_s(intvar[OFS]);
1727 for (i = 0; i < n; i++) {
1728 s = getvar_s(&Fields[i]);
1731 memcpy(b+len, sep, sl);
1734 b = qrealloc(b, len+l+sl, &bsize);
1735 memcpy(b+len, s, l);
1740 setvar_p(intvar[F0], b);
1743 } else if (v == intvar[F0]) {
1744 is_f0_split = FALSE;
1746 } else if (v == intvar[FS]) {
1747 mk_splitter(getvar_s(v), &fsplitter);
1749 } else if (v == intvar[RS]) {
1750 mk_splitter(getvar_s(v), &rsplitter);
1752 } else if (v == intvar[IGNORECASE]) {
1756 n = getvar_i(intvar[NF]);
1757 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1758 /* right here v is invalid. Just to note... */
1762 /* step through func/builtin/etc arguments */
1763 static node *nextarg(node **pn)
1768 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1777 static void hashwalk_init(var *v, xhash *array)
1782 walker_list *prev_walker;
1784 if (v->type & VF_WALK) {
1785 prev_walker = v->x.walker;
1790 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1792 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1793 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1794 w->cur = w->end = w->wbuf;
1795 w->prev = prev_walker;
1796 for (i = 0; i < array->csize; i++) {
1797 hi = array->items[i];
1799 strcpy(w->end, hi->name);
1806 static int hashwalk_next(var *v)
1808 walker_list *w = v->x.walker;
1810 if (w->cur >= w->end) {
1811 walker_list *prev_walker = w->prev;
1813 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1815 v->x.walker = prev_walker;
1819 setvar_s(v, nextword(&w->cur));
1823 /* evaluate node, return 1 when result is true, 0 otherwise */
1824 static int ptest(node *pattern)
1826 /* ptest__v is "static": to save stack space? */
1827 return istrue(evaluate(pattern, &G.ptest__v));
1830 /* read next record from stream rsm into a variable v */
1831 static int awk_getline(rstream *rsm, var *v)
1834 regmatch_t pmatch[2];
1835 int size, a, p, pp = 0;
1836 int fd, so, eo, r, rp;
1839 debug_printf_eval("entered %s()\n", __func__);
1841 /* we're using our own buffer since we need access to accumulating
1844 fd = fileno(rsm->F);
1849 c = (char) rsplitter.n.info;
1853 m = qrealloc(m, 256, &size);
1860 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1861 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1862 b, 1, pmatch, 0) == 0) {
1863 so = pmatch[0].rm_so;
1864 eo = pmatch[0].rm_eo;
1868 } else if (c != '\0') {
1869 s = strchr(b+pp, c);
1871 s = memchr(b+pp, '\0', p - pp);
1878 while (b[rp] == '\n')
1880 s = strstr(b+rp, "\n\n");
1883 while (b[eo] == '\n')
1892 memmove(m, m+a, p+1);
1897 m = qrealloc(m, a+p+128, &size);
1900 p += safe_read(fd, b+p, size-p-1);
1904 setvar_i(intvar[ERRNO], errno);
1913 c = b[so]; b[so] = '\0';
1917 c = b[eo]; b[eo] = '\0';
1918 setvar_s(intvar[RT], b+so);
1927 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1932 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1936 const char *s = format;
1938 if (int_as_int && n == (int)n) {
1939 r = snprintf(b, size, "%d", (int)n);
1941 do { c = *s; } while (c && *++s);
1942 if (strchr("diouxX", c)) {
1943 r = snprintf(b, size, format, (int)n);
1944 } else if (strchr("eEfgG", c)) {
1945 r = snprintf(b, size, format, n);
1947 syntax_error(EMSG_INV_FMT);
1953 /* formatted output into an allocated buffer, return ptr to buffer */
1954 static char *awk_printf(node *n)
1959 int i, j, incr, bsize;
1964 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1969 while (*f && (*f != '%' || *++f == '%'))
1971 while (*f && !isalpha(*f)) {
1973 syntax_error("%*x formats are not supported");
1977 incr = (f - s) + MAXVARFMT;
1978 b = qrealloc(b, incr + i, &bsize);
1984 arg = evaluate(nextarg(&n), v);
1987 if (c == 'c' || !c) {
1988 i += sprintf(b+i, s, is_numeric(arg) ?
1989 (char)getvar_i(arg) : *getvar_s(arg));
1990 } else if (c == 's') {
1992 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1993 i += sprintf(b+i, s, s1);
1995 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1999 /* if there was an error while sprintf, return value is negative */
2006 b = xrealloc(b, i + 1);
2011 /* Common substitution routine.
2012 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2013 * store result into (dest), return number of substitutions.
2014 * If nm = 0, replace all matches.
2015 * If src or dst is NULL, use $0.
2016 * If subexp != 0, enable subexpression matching (\1-\9).
2018 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2022 int match_no, residx, replen, resbufsize;
2024 regmatch_t pmatch[10];
2025 regex_t sreg, *regex;
2031 regex = as_regex(rn, &sreg);
2032 sp = getvar_s(src ? src : intvar[F0]);
2033 replen = strlen(repl);
2034 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2035 int so = pmatch[0].rm_so;
2036 int eo = pmatch[0].rm_eo;
2038 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2039 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2040 memcpy(resbuf + residx, sp, eo);
2042 if (++match_no >= nm) {
2047 residx -= (eo - so);
2049 for (s = repl; *s; s++) {
2050 char c = resbuf[residx++] = *s;
2055 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2057 residx -= ((nbs + 3) >> 1);
2064 resbuf[residx++] = c;
2066 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2067 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2068 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2076 regexec_flags = REG_NOTBOL;
2081 /* Empty match (e.g. "b*" will match anywhere).
2082 * Advance by one char. */
2084 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2085 //... and will erroneously match "b" even though it is NOT at the word start.
2086 //we need REG_NOTBOW but it does not exist...
2087 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2088 //it should be able to do it correctly.
2089 /* Subtle: this is safe only because
2090 * qrealloc allocated at least one extra byte */
2091 resbuf[residx] = *sp;
2099 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2100 strcpy(resbuf + residx, sp);
2102 //bb_error_msg("end sp:'%s'%p", sp,sp);
2103 setvar_p(dest ? dest : intvar[F0], resbuf);
2109 static NOINLINE int do_mktime(const char *ds)
2114 /*memset(&then, 0, sizeof(then)); - not needed */
2115 then.tm_isdst = -1; /* default is unknown */
2117 /* manpage of mktime says these fields are ints,
2118 * so we can sscanf stuff directly into them */
2119 count = sscanf(ds, "%u %u %u %u %u %u %d",
2120 &then.tm_year, &then.tm_mon, &then.tm_mday,
2121 &then.tm_hour, &then.tm_min, &then.tm_sec,
2125 || (unsigned)then.tm_mon < 1
2126 || (unsigned)then.tm_year < 1900
2132 then.tm_year -= 1900;
2134 return mktime(&then);
2137 static NOINLINE var *exec_builtin(node *op, var *res)
2139 #define tspl (G.exec_builtin__tspl)
2145 regmatch_t pmatch[2];
2154 isr = info = op->info;
2157 av[2] = av[3] = NULL;
2158 for (i = 0; i < 4 && op; i++) {
2159 an[i] = nextarg(&op);
2160 if (isr & 0x09000000)
2161 av[i] = evaluate(an[i], &tv[i]);
2162 if (isr & 0x08000000)
2163 as[i] = getvar_s(av[i]);
2168 if ((uint32_t)nargs < (info >> 30))
2169 syntax_error(EMSG_TOO_FEW_ARGS);
2175 if (ENABLE_FEATURE_AWK_LIBM)
2176 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2178 syntax_error(EMSG_NO_MATH);
2185 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2186 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2191 n = awk_split(as[0], spl, &s);
2193 clear_array(iamarray(av[1]));
2194 for (i = 1; i <= n; i++)
2195 setari_u(av[1], i, nextword(&s));
2205 i = getvar_i(av[1]) - 1;
2210 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2213 s = xstrndup(as[0]+i, n);
2218 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2219 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2221 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2225 setvar_i(res, ~getvar_i_int(av[0]));
2229 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2233 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2237 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2241 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2247 s1 = s = xstrdup(as[0]);
2249 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2250 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2251 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2261 l = strlen(as[0]) - ll;
2262 if (ll > 0 && l >= 0) {
2264 char *s = strstr(as[0], as[1]);
2266 n = (s - as[0]) + 1;
2268 /* this piece of code is terribly slow and
2269 * really should be rewritten
2271 for (i = 0; i <= l; i++) {
2272 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2284 tt = getvar_i(av[1]);
2287 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2288 i = strftime(g_buf, MAXVARFMT,
2289 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2292 setvar_s(res, g_buf);
2296 setvar_i(res, do_mktime(as[0]));
2300 re = as_regex(an[1], &sreg);
2301 n = regexec(re, as[0], 1, pmatch, 0);
2306 pmatch[0].rm_so = 0;
2307 pmatch[0].rm_eo = -1;
2309 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2310 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2311 setvar_i(res, pmatch[0].rm_so);
2317 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2321 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2325 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2335 * Evaluate node - the heart of the program. Supplied with subtree
2336 * and place where to store result. returns ptr to result.
2338 #define XC(n) ((n) >> 8)
2340 static var *evaluate(node *op, var *res)
2342 /* This procedure is recursive so we should count every byte */
2343 #define fnargs (G.evaluate__fnargs)
2344 /* seed is initialized to 1 */
2345 #define seed (G.evaluate__seed)
2346 #define sreg (G.evaluate__sreg)
2351 return setvar_s(res, NULL);
2353 debug_printf_eval("entered %s()\n", __func__);
2361 } L = L; /* for compiler */
2372 opn = (opinfo & OPNMASK);
2373 g_lineno = op->lineno;
2375 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2377 /* execute inevitable things */
2378 if (opinfo & OF_RES1)
2379 L.v = evaluate(op1, v1);
2380 if (opinfo & OF_RES2)
2381 R.v = evaluate(op->r.n, v1+1);
2382 if (opinfo & OF_STR1) {
2383 L.s = getvar_s(L.v);
2384 debug_printf_eval("L.s:'%s'\n", L.s);
2386 if (opinfo & OF_STR2) {
2387 R.s = getvar_s(R.v);
2388 debug_printf_eval("R.s:'%s'\n", R.s);
2390 if (opinfo & OF_NUM1) {
2391 L_d = getvar_i(L.v);
2392 debug_printf_eval("L_d:%f\n", L_d);
2395 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2396 switch (XC(opinfo & OPCLSMASK)) {
2398 /* -- iterative node type -- */
2402 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2403 /* it's range pattern */
2404 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2405 op->info |= OF_CHECKED;
2406 if (ptest(op1->r.n))
2407 op->info &= ~OF_CHECKED;
2413 op = ptest(op1) ? op->a.n : op->r.n;
2417 /* just evaluate an expression, also used as unconditional jump */
2421 /* branch, used in if-else and various loops */
2423 op = istrue(L.v) ? op->a.n : op->r.n;
2426 /* initialize for-in loop */
2427 case XC( OC_WALKINIT ):
2428 hashwalk_init(L.v, iamarray(R.v));
2431 /* get next array item */
2432 case XC( OC_WALKNEXT ):
2433 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2436 case XC( OC_PRINT ):
2437 case XC( OC_PRINTF ): {
2441 rstream *rsm = newfile(R.s);
2444 rsm->F = popen(R.s, "w");
2446 bb_perror_msg_and_die("popen");
2449 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2455 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2457 fputs(getvar_s(intvar[F0]), F);
2460 var *v = evaluate(nextarg(&op1), v1);
2461 if (v->type & VF_NUMBER) {
2462 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2466 fputs(getvar_s(v), F);
2470 fputs(getvar_s(intvar[OFS]), F);
2473 fputs(getvar_s(intvar[ORS]), F);
2475 } else { /* OC_PRINTF */
2476 char *s = awk_printf(op1);
2484 case XC( OC_DELETE ): {
2485 uint32_t info = op1->info & OPCLSMASK;
2488 if (info == OC_VAR) {
2490 } else if (info == OC_FNARG) {
2491 v = &fnargs[op1->l.aidx];
2493 syntax_error(EMSG_NOT_ARRAY);
2499 s = getvar_s(evaluate(op1->r.n, v1));
2500 hash_remove(iamarray(v), s);
2502 clear_array(iamarray(v));
2507 case XC( OC_NEWSOURCE ):
2508 g_progname = op->l.new_progname;
2511 case XC( OC_RETURN ):
2515 case XC( OC_NEXTFILE ):
2526 /* -- recursive node type -- */
2530 if (L.v == intvar[NF])
2534 case XC( OC_FNARG ):
2535 L.v = &fnargs[op->l.aidx];
2537 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2541 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2544 case XC( OC_REGEXP ):
2546 L.s = getvar_s(intvar[F0]);
2549 case XC( OC_MATCH ):
2553 regex_t *re = as_regex(op1, &sreg);
2554 int i = regexec(re, L.s, 0, NULL, 0);
2557 setvar_i(res, (i == 0) ^ (opn == '!'));
2562 debug_printf_eval("MOVE\n");
2563 /* if source is a temporary string, jusk relink it to dest */
2564 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2565 //then L.v ends up being a string, which is wrong
2566 // if (R.v == v1+1 && R.v->string) {
2567 // res = setvar_p(L.v, R.v->string);
2568 // R.v->string = NULL;
2570 res = copyvar(L.v, R.v);
2574 case XC( OC_TERNARY ):
2575 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2576 syntax_error(EMSG_POSSIBLE_ERROR);
2577 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2580 case XC( OC_FUNC ): {
2582 const char *sv_progname;
2584 if (!op->r.f->body.first)
2585 syntax_error(EMSG_UNDEF_FUNC);
2587 vbeg = v = nvalloc(op->r.f->nargs + 1);
2589 var *arg = evaluate(nextarg(&op1), v1);
2591 v->type |= VF_CHILD;
2593 if (++v - vbeg >= op->r.f->nargs)
2599 sv_progname = g_progname;
2601 res = evaluate(op->r.f->body.first, res);
2603 g_progname = sv_progname;
2610 case XC( OC_GETLINE ):
2611 case XC( OC_PGETLINE ): {
2618 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2619 rsm->F = popen(L.s, "r");
2620 rsm->is_pipe = TRUE;
2622 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2627 iF = next_input_file();
2632 setvar_i(intvar[ERRNO], errno);
2640 i = awk_getline(rsm, R.v);
2641 if (i > 0 && !op1) {
2642 incvar(intvar[FNR]);
2649 /* simple builtins */
2650 case XC( OC_FBLTIN ): {
2651 double R_d = R_d; /* for compiler */
2659 R_d = (double)rand() / (double)RAND_MAX;
2663 if (ENABLE_FEATURE_AWK_LIBM) {
2669 if (ENABLE_FEATURE_AWK_LIBM) {
2675 if (ENABLE_FEATURE_AWK_LIBM) {
2681 if (ENABLE_FEATURE_AWK_LIBM) {
2687 if (ENABLE_FEATURE_AWK_LIBM) {
2692 syntax_error(EMSG_NO_MATH);
2697 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2707 L.s = getvar_s(intvar[F0]);
2713 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2714 ? (system(L.s) >> 8) : 0;
2720 } else if (L.s && *L.s) {
2721 rstream *rsm = newfile(L.s);
2731 rsm = (rstream *)hash_search(fdhash, L.s);
2732 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2734 debug_printf_eval("OC_FBLTIN F_cl "
2735 "rsm->is_pipe:%d, ->F:%p\n",
2736 rsm->is_pipe, rsm->F);
2737 /* Can be NULL if open failed. Example:
2738 * getline line <"doesnt_exist";
2739 * close("doesnt_exist"); <--- here rsm->F is NULL
2742 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2744 hash_remove(fdhash, L.s);
2747 setvar_i(intvar[ERRNO], errno);
2756 case XC( OC_BUILTIN ):
2757 res = exec_builtin(op, res);
2760 case XC( OC_SPRINTF ):
2761 setvar_p(res, awk_printf(op1));
2764 case XC( OC_UNARY ): {
2767 Ld = R_d = getvar_i(R.v);
2794 case XC( OC_FIELD ): {
2795 int i = (int)getvar_i(R.v);
2802 res = &Fields[i - 1];
2807 /* concatenation (" ") and index joining (",") */
2808 case XC( OC_CONCAT ):
2809 case XC( OC_COMMA ): {
2810 const char *sep = "";
2811 if ((opinfo & OPCLSMASK) == OC_COMMA)
2812 sep = getvar_s(intvar[SUBSEP]);
2813 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2818 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2822 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2825 case XC( OC_BINARY ):
2826 case XC( OC_REPLACE ): {
2827 double R_d = getvar_i(R.v);
2828 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2841 syntax_error(EMSG_DIV_BY_ZERO);
2845 if (ENABLE_FEATURE_AWK_LIBM)
2846 L_d = pow(L_d, R_d);
2848 syntax_error(EMSG_NO_MATH);
2852 syntax_error(EMSG_DIV_BY_ZERO);
2853 L_d -= (int)(L_d / R_d) * R_d;
2856 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2857 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2861 case XC( OC_COMPARE ): {
2862 int i = i; /* for compiler */
2865 if (is_numeric(L.v) && is_numeric(R.v)) {
2866 Ld = getvar_i(L.v) - getvar_i(R.v);
2868 const char *l = getvar_s(L.v);
2869 const char *r = getvar_s(R.v);
2870 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2872 switch (opn & 0xfe) {
2883 setvar_i(res, (i == 0) ^ (opn & 1));
2888 syntax_error(EMSG_POSSIBLE_ERROR);
2890 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2892 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2899 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2907 /* -------- main & co. -------- */
2909 static int awk_exit(int r)
2920 evaluate(endseq.first, &tv);
2923 /* waiting for children */
2924 for (i = 0; i < fdhash->csize; i++) {
2925 hi = fdhash->items[i];
2927 if (hi->data.rs.F && hi->data.rs.is_pipe)
2928 pclose(hi->data.rs.F);
2936 /* if expr looks like "var=value", perform assignment and return 1,
2937 * otherwise return 0 */
2938 static int is_assignment(const char *expr)
2940 char *exprc, *val, *s, *s1;
2942 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2946 exprc = xstrdup(expr);
2947 val = exprc + (val - expr);
2951 while ((*s1 = nextchar(&s)) != '\0')
2954 setvar_u(newvar(exprc), val);
2959 /* switch to next input file */
2960 static rstream *next_input_file(void)
2962 #define rsm (G.next_input_file__rsm)
2963 #define files_happen (G.next_input_file__files_happen)
2966 const char *fname, *ind;
2971 rsm.pos = rsm.adv = 0;
2974 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2980 ind = getvar_s(incvar(intvar[ARGIND]));
2981 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2982 if (fname && *fname && !is_assignment(fname))
2983 F = xfopen_stdin(fname);
2987 files_happen = TRUE;
2988 setvar_s(intvar[FILENAME], fname);
2995 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2996 int awk_main(int argc, char **argv)
2999 char *opt_F, *opt_W;
3000 llist_t *list_v = NULL;
3001 llist_t *list_f = NULL;
3006 char *vnames = (char *)vNames; /* cheat */
3007 char *vvalues = (char *)vValues;
3011 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3012 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3013 if (ENABLE_LOCALE_SUPPORT)
3014 setlocale(LC_NUMERIC, "C");
3018 /* allocate global buffer */
3019 g_buf = xmalloc(MAXVARFMT + 1);
3021 vhash = hash_init();
3022 ahash = hash_init();
3023 fdhash = hash_init();
3024 fnhash = hash_init();
3026 /* initialize variables */
3027 for (i = 0; *vnames; i++) {
3028 intvar[i] = v = newvar(nextword(&vnames));
3029 if (*vvalues != '\377')
3030 setvar_s(v, nextword(&vvalues));
3034 if (*vnames == '*') {
3035 v->type |= VF_SPECIAL;
3040 handle_special(intvar[FS]);
3041 handle_special(intvar[RS]);
3043 newfile("/dev/stdin")->F = stdin;
3044 newfile("/dev/stdout")->F = stdout;
3045 newfile("/dev/stderr")->F = stderr;
3047 /* Huh, people report that sometimes environ is NULL. Oh well. */
3048 if (environ) for (envp = environ; *envp; envp++) {
3049 /* environ is writable, thus we don't strdup it needlessly */
3051 char *s1 = strchr(s, '=');
3054 /* Both findvar and setvar_u take const char*
3055 * as 2nd arg -> environment is not trashed */
3056 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3060 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3061 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3065 setvar_s(intvar[FS], opt_F); // -F
3066 while (list_v) { /* -v */
3067 if (!is_assignment(llist_pop(&list_v)))
3070 if (list_f) { /* -f */
3075 g_progname = llist_pop(&list_f);
3076 from_file = xfopen_stdin(g_progname);
3077 /* one byte is reserved for some trick in next_token */
3078 for (i = j = 1; j > 0; i += j) {
3079 s = xrealloc(s, i + 4096);
3080 j = fread(s + i, 1, 4094, from_file);
3084 parse_program(s + 1);
3088 } else { // no -f: take program from 1st parameter
3091 g_progname = "cmd. line";
3092 parse_program(*argv++);
3094 if (opt & 0x8) // -W
3095 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3097 /* fill in ARGV array */
3098 setvar_i(intvar[ARGC], argc);
3099 setari_u(intvar[ARGV], 0, "awk");
3102 setari_u(intvar[ARGV], ++i, *argv++);
3104 evaluate(beginseq.first, &tv);
3105 if (!mainseq.first && !endseq.first)
3106 awk_exit(EXIT_SUCCESS);
3108 /* input file could already be opened in BEGIN block */
3110 iF = next_input_file();
3112 /* passing through input files */
3115 setvar_i(intvar[FNR], 0);
3117 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3120 incvar(intvar[FNR]);
3121 evaluate(mainseq.first, &tv);
3128 syntax_error(strerror(errno));
3130 iF = next_input_file();
3133 awk_exit(EXIT_SUCCESS);