1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
29 #ifndef debug_printf_walker
30 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #ifndef debug_printf_eval
33 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
42 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
43 #define VF_ARRAY 0x0002 /* 1 = it's an array */
45 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
46 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
47 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
48 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
49 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
50 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
51 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
53 /* these flags are static, don't change them when value is changed */
54 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct walker_list {
59 struct walker_list *prev;
64 typedef struct var_s {
65 unsigned type; /* flags */
69 int aidx; /* func arg idx (for compilation stage) */
70 struct xhash_s *array; /* array ptr */
71 struct var_s *parent; /* for func args, ptr to actual parameter */
72 walker_list *walker; /* list of array elements (for..in) */
76 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
77 typedef struct chain_s {
80 const char *programname;
84 typedef struct func_s {
90 typedef struct rstream_s {
99 typedef struct hash_item_s {
101 struct var_s v; /* variable/array hash */
102 struct rstream_s rs; /* redirect streams hash */
103 struct func_s f; /* functions hash */
105 struct hash_item_s *next; /* next in chain */
106 char name[1]; /* really it's longer */
109 typedef struct xhash_s {
110 unsigned nel; /* num of elements */
111 unsigned csize; /* current hash size */
112 unsigned nprime; /* next hash size in PRIMES[] */
113 unsigned glen; /* summary length of item names */
114 struct hash_item_s **items;
118 typedef struct node_s {
138 /* Block of temporary variables */
139 typedef struct nvblock_s {
142 struct nvblock_s *prev;
143 struct nvblock_s *next;
147 typedef struct tsplitter_s {
152 /* simple token classes */
153 /* Order and hex values are very important!!! See next_token() */
154 #define TC_SEQSTART 1 /* ( */
155 #define TC_SEQTERM (1 << 1) /* ) */
156 #define TC_REGEXP (1 << 2) /* /.../ */
157 #define TC_OUTRDR (1 << 3) /* | > >> */
158 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
159 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
160 #define TC_BINOPX (1 << 6) /* two-opnd operator */
161 #define TC_IN (1 << 7)
162 #define TC_COMMA (1 << 8)
163 #define TC_PIPE (1 << 9) /* input redirection pipe */
164 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
165 #define TC_ARRTERM (1 << 11) /* ] */
166 #define TC_GRPSTART (1 << 12) /* { */
167 #define TC_GRPTERM (1 << 13) /* } */
168 #define TC_SEMICOL (1 << 14)
169 #define TC_NEWLINE (1 << 15)
170 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
171 #define TC_WHILE (1 << 17)
172 #define TC_ELSE (1 << 18)
173 #define TC_BUILTIN (1 << 19)
174 #define TC_GETLINE (1 << 20)
175 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
176 #define TC_BEGIN (1 << 22)
177 #define TC_END (1 << 23)
178 #define TC_EOF (1 << 24)
179 #define TC_VARIABLE (1 << 25)
180 #define TC_ARRAY (1 << 26)
181 #define TC_FUNCTION (1 << 27)
182 #define TC_STRING (1 << 28)
183 #define TC_NUMBER (1 << 29)
185 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
187 /* combined token classes */
188 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
189 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
190 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
191 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
193 #define TC_STATEMNT (TC_STATX | TC_WHILE)
194 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
196 /* word tokens, cannot mean something else if not expected */
197 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
198 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
200 /* discard newlines after these */
201 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
202 | TC_BINOP | TC_OPTERM)
204 /* what can expression begin with */
205 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
206 /* what can group begin with */
207 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
209 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
210 /* operator is inserted between them */
211 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
212 | TC_STRING | TC_NUMBER | TC_UOPPOST)
213 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
215 #define OF_RES1 0x010000
216 #define OF_RES2 0x020000
217 #define OF_STR1 0x040000
218 #define OF_STR2 0x080000
219 #define OF_NUM1 0x100000
220 #define OF_CHECKED 0x200000
222 /* combined operator flags */
225 #define xS (OF_RES2 | OF_STR2)
227 #define VV (OF_RES1 | OF_RES2)
228 #define Nx (OF_RES1 | OF_NUM1)
229 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
230 #define Sx (OF_RES1 | OF_STR1)
231 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
232 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
234 #define OPCLSMASK 0xFF00
235 #define OPNMASK 0x007F
237 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
238 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
239 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
241 #define P(x) (x << 24)
242 #define PRIMASK 0x7F000000
243 #define PRIMASK2 0x7E000000
245 /* Operation classes */
247 #define SHIFT_TIL_THIS 0x0600
248 #define RECUR_FROM_THIS 0x1000
251 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
252 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
254 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
255 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
256 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
258 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
259 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
260 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
261 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
262 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
263 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
264 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
265 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
268 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
272 /* simple builtins */
274 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
275 F_ti, F_le, F_sy, F_ff, F_cl
280 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
282 B_an, B_co, B_ls, B_or, B_rs, B_xo,
285 /* tokens and their corresponding info values */
287 #define NTC "\377" /* switch to next token class (tc<<1) */
290 #define OC_B OC_BUILTIN
292 static const char tokenlist[] ALIGN1 =
295 "\1/" NTC /* REGEXP */
296 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
297 "\2++" "\2--" NTC /* UOPPOST */
298 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
299 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
300 "\2*=" "\2/=" "\2%=" "\2^="
301 "\1+" "\1-" "\3**=" "\2**"
302 "\1/" "\1%" "\1^" "\1*"
303 "\2!=" "\2>=" "\2<=" "\1>"
304 "\1<" "\2!~" "\1~" "\2&&"
305 "\2||" "\1?" "\1:" NTC
309 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
315 "\2if" "\2do" "\3for" "\5break" /* STATX */
316 "\10continue" "\6delete" "\5print"
317 "\6printf" "\4next" "\10nextfile"
318 "\6return" "\4exit" NTC
322 "\3and" "\5compl" "\6lshift" "\2or"
324 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
325 "\3cos" "\3exp" "\3int" "\3log"
326 "\4rand" "\3sin" "\4sqrt" "\5srand"
327 "\6gensub" "\4gsub" "\5index" "\6length"
328 "\5match" "\5split" "\7sprintf" "\3sub"
329 "\6substr" "\7systime" "\10strftime" "\6mktime"
330 "\7tolower" "\7toupper" NTC
332 "\4func" "\10function" NTC
335 /* compiler adds trailing "\0" */
338 static const uint32_t tokeninfo[] = {
342 xS|'a', xS|'w', xS|'|',
343 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
344 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
345 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
346 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
347 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
348 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
349 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
350 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
351 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
352 OC_IN|SV|P(49), /* in */
354 OC_PGETLINE|SV|P(37),
355 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
361 ST_IF, ST_DO, ST_FOR, OC_BREAK,
362 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
363 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
364 OC_RETURN|Vx, OC_EXIT|Nx,
368 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
369 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
370 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
371 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
372 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
373 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
374 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
375 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
376 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
383 /* internal variable names and their initial values */
384 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
386 CONVFMT, OFMT, FS, OFS,
387 ORS, RS, RT, FILENAME,
388 SUBSEP, F0, ARGIND, ARGC,
389 ARGV, ERRNO, FNR, NR,
390 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
393 static const char vNames[] ALIGN1 =
394 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
395 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
396 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
397 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
398 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
400 static const char vValues[] ALIGN1 =
401 "%.6g\0" "%.6g\0" " \0" " \0"
402 "\n\0" "\n\0" "\0" "\0"
403 "\034\0" "\0" "\377";
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61
407 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
410 /* Globals. Split in two parts so that first one is addressed
411 * with (mostly short) negative offsets.
412 * NB: it's unsafe to put members of type "double"
413 * into globals2 (gcc may fail to align them).
417 chain beginseq, mainseq, endseq;
419 node *break_ptr, *continue_ptr;
421 xhash *vhash, *ahash, *fdhash, *fnhash;
422 const char *g_progname;
425 int maxfields; /* used in fsrealloc() only */
434 smallint is_f0_split;
437 uint32_t t_info; /* often used */
443 var *intvar[NUM_INTERNAL_VARS]; /* often used */
445 /* former statics from various functions */
446 char *split_f0__fstrings;
448 uint32_t next_token__save_tclass;
449 uint32_t next_token__save_info;
450 uint32_t next_token__ltclass;
451 smallint next_token__concat_inserted;
453 smallint next_input_file__files_happen;
454 rstream next_input_file__rsm;
456 var *evaluate__fnargs;
457 unsigned evaluate__seed;
458 regex_t evaluate__sreg;
462 tsplitter exec_builtin__tspl;
464 /* biggest and least used members go last */
465 tsplitter fsplitter, rsplitter;
467 #define G1 (ptr_to_globals[-1])
468 #define G (*(struct globals2 *)ptr_to_globals)
469 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
470 /*char G1size[sizeof(G1)]; - 0x74 */
471 /*char Gsize[sizeof(G)]; - 0x1c4 */
472 /* Trying to keep most of members accessible with short offsets: */
473 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
474 #define t_double (G1.t_double )
475 #define beginseq (G1.beginseq )
476 #define mainseq (G1.mainseq )
477 #define endseq (G1.endseq )
478 #define seq (G1.seq )
479 #define break_ptr (G1.break_ptr )
480 #define continue_ptr (G1.continue_ptr)
482 #define vhash (G1.vhash )
483 #define ahash (G1.ahash )
484 #define fdhash (G1.fdhash )
485 #define fnhash (G1.fnhash )
486 #define g_progname (G1.g_progname )
487 #define g_lineno (G1.g_lineno )
488 #define nfields (G1.nfields )
489 #define maxfields (G1.maxfields )
490 #define Fields (G1.Fields )
491 #define g_cb (G1.g_cb )
492 #define g_pos (G1.g_pos )
493 #define g_buf (G1.g_buf )
494 #define icase (G1.icase )
495 #define exiting (G1.exiting )
496 #define nextrec (G1.nextrec )
497 #define nextfile (G1.nextfile )
498 #define is_f0_split (G1.is_f0_split )
499 #define t_info (G.t_info )
500 #define t_tclass (G.t_tclass )
501 #define t_string (G.t_string )
502 #define t_lineno (G.t_lineno )
503 #define t_rollback (G.t_rollback )
504 #define intvar (G.intvar )
505 #define fsplitter (G.fsplitter )
506 #define rsplitter (G.rsplitter )
507 #define INIT_G() do { \
508 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
509 G.next_token__ltclass = TC_OPTERM; \
510 G.evaluate__seed = 1; \
514 /* function prototypes */
515 static void handle_special(var *);
516 static node *parse_expr(uint32_t);
517 static void chain_group(void);
518 static var *evaluate(node *, var *);
519 static rstream *next_input_file(void);
520 static int fmt_num(char *, int, const char *, double, int);
521 static int awk_exit(int) NORETURN;
523 /* ---- error handling ---- */
525 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
526 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
527 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
528 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
529 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
530 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
531 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
532 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
533 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
534 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
536 static void zero_out_var(var *vp)
538 memset(vp, 0, sizeof(*vp));
541 static void syntax_error(const char *message) NORETURN;
542 static void syntax_error(const char *message)
544 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
547 /* ---- hash stuff ---- */
549 static unsigned hashidx(const char *name)
554 idx = *name++ + (idx << 6) - idx;
558 /* create new hash */
559 static xhash *hash_init(void)
563 newhash = xzalloc(sizeof(*newhash));
564 newhash->csize = FIRST_PRIME;
565 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
570 /* find item in hash, return ptr to data, NULL if not found */
571 static void *hash_search(xhash *hash, const char *name)
575 hi = hash->items[hashidx(name) % hash->csize];
577 if (strcmp(hi->name, name) == 0)
584 /* grow hash if it becomes too big */
585 static void hash_rebuild(xhash *hash)
587 unsigned newsize, i, idx;
588 hash_item **newitems, *hi, *thi;
590 if (hash->nprime == ARRAY_SIZE(PRIMES))
593 newsize = PRIMES[hash->nprime++];
594 newitems = xzalloc(newsize * sizeof(newitems[0]));
596 for (i = 0; i < hash->csize; i++) {
601 idx = hashidx(thi->name) % newsize;
602 thi->next = newitems[idx];
608 hash->csize = newsize;
609 hash->items = newitems;
612 /* find item in hash, add it if necessary. Return ptr to data */
613 static void *hash_find(xhash *hash, const char *name)
619 hi = hash_search(hash, name);
621 if (++hash->nel / hash->csize > 10)
624 l = strlen(name) + 1;
625 hi = xzalloc(sizeof(*hi) + l);
626 strcpy(hi->name, name);
628 idx = hashidx(name) % hash->csize;
629 hi->next = hash->items[idx];
630 hash->items[idx] = hi;
636 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
637 #define newvar(name) ((var*) hash_find(vhash, (name)))
638 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
639 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
641 static void hash_remove(xhash *hash, const char *name)
643 hash_item *hi, **phi;
645 phi = &hash->items[hashidx(name) % hash->csize];
648 if (strcmp(hi->name, name) == 0) {
649 hash->glen -= (strlen(name) + 1);
659 /* ------ some useful functions ------ */
661 static char *skip_spaces(char *p)
664 if (*p == '\\' && p[1] == '\n') {
667 } else if (*p != ' ' && *p != '\t') {
675 /* returns old *s, advances *s past word and terminating NUL */
676 static char *nextword(char **s)
679 while (*(*s)++ != '\0')
684 static char nextchar(char **s)
691 c = bb_process_escape_sequence((const char**)s);
692 if (c == '\\' && *s == pps) { /* unrecognized \z? */
693 c = *(*s); /* yes, fetch z */
695 (*s)++; /* advance unless z = NUL */
700 static ALWAYS_INLINE int isalnum_(int c)
702 return (isalnum(c) || c == '_');
705 static double my_strtod(char **pp)
708 if (ENABLE_DESKTOP && cp[0] == '0') {
709 /* Might be hex or octal integer: 0x123abc or 07777 */
710 char c = (cp[1] | 0x20);
711 if (c == 'x' || isdigit(cp[1])) {
712 unsigned long long ull = strtoull(cp, pp, 0);
716 if (!isdigit(c) && c != '.')
718 /* else: it may be a floating number. Examples:
719 * 009.123 (*pp points to '9')
720 * 000.123 (*pp points to '.')
721 * fall through to strtod.
725 return strtod(cp, pp);
728 /* -------- working with variables (set/get/copy/etc) -------- */
730 static xhash *iamarray(var *v)
734 while (a->type & VF_CHILD)
737 if (!(a->type & VF_ARRAY)) {
739 a->x.array = hash_init();
744 static void clear_array(xhash *array)
749 for (i = 0; i < array->csize; i++) {
750 hi = array->items[i];
754 free(thi->data.v.string);
757 array->items[i] = NULL;
759 array->glen = array->nel = 0;
762 /* clear a variable */
763 static var *clrvar(var *v)
765 if (!(v->type & VF_FSTR))
768 v->type &= VF_DONTTOUCH;
774 /* assign string value to variable */
775 static var *setvar_p(var *v, char *value)
783 /* same as setvar_p but make a copy of string */
784 static var *setvar_s(var *v, const char *value)
786 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
789 /* same as setvar_s but sets USER flag */
790 static var *setvar_u(var *v, const char *value)
792 v = setvar_s(v, value);
797 /* set array element to user string */
798 static void setari_u(var *a, int idx, const char *s)
802 v = findvar(iamarray(a), itoa(idx));
806 /* assign numeric value to variable */
807 static var *setvar_i(var *v, double value)
810 v->type |= VF_NUMBER;
816 static const char *getvar_s(var *v)
818 /* if v is numeric and has no cached string, convert it to string */
819 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
820 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
821 v->string = xstrdup(g_buf);
822 v->type |= VF_CACHED;
824 return (v->string == NULL) ? "" : v->string;
827 static double getvar_i(var *v)
831 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
835 debug_printf_eval("getvar_i: '%s'->", s);
836 v->number = my_strtod(&s);
837 debug_printf_eval("%f (s:'%s')\n", v->number, s);
838 if (v->type & VF_USER) {
844 debug_printf_eval("getvar_i: '%s'->zero\n", s);
847 v->type |= VF_CACHED;
849 debug_printf_eval("getvar_i: %f\n", v->number);
853 /* Used for operands of bitwise ops */
854 static unsigned long getvar_i_int(var *v)
856 double d = getvar_i(v);
858 /* Casting doubles to longs is undefined for values outside
859 * of target type range. Try to widen it as much as possible */
861 return (unsigned long)d;
862 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
863 return - (long) (unsigned long) (-d);
866 static var *copyvar(var *dest, const var *src)
870 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
871 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
872 dest->number = src->number;
874 dest->string = xstrdup(src->string);
876 handle_special(dest);
880 static var *incvar(var *v)
882 return setvar_i(v, getvar_i(v) + 1.0);
885 /* return true if v is number or numeric string */
886 static int is_numeric(var *v)
889 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
892 /* return 1 when value of v corresponds to true, 0 otherwise */
893 static int istrue(var *v)
896 return (v->number != 0);
897 return (v->string && v->string[0]);
900 /* temporary variables allocator. Last allocated should be first freed */
901 static var *nvalloc(int n)
909 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
915 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
916 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
918 g_cb->pos = g_cb->nv;
920 /*g_cb->next = NULL; - xzalloc did it */
928 while (v < g_cb->pos) {
937 static void nvfree(var *v)
941 if (v < g_cb->nv || v >= g_cb->pos)
942 syntax_error(EMSG_INTERNAL_ERROR);
944 for (p = v; p < g_cb->pos; p++) {
945 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
946 clear_array(iamarray(p));
947 free(p->x.array->items);
950 if (p->type & VF_WALK) {
952 walker_list *w = p->x.walker;
953 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
957 debug_printf_walker(" free(%p)\n", w);
966 while (g_cb->prev && g_cb->pos == g_cb->nv) {
971 /* ------- awk program text parsing ------- */
973 /* Parse next token pointed by global pos, place results into global ttt.
974 * If token isn't expected, give away. Return token class
976 static uint32_t next_token(uint32_t expected)
978 #define concat_inserted (G.next_token__concat_inserted)
979 #define save_tclass (G.next_token__save_tclass)
980 #define save_info (G.next_token__save_info)
981 /* Initialized to TC_OPTERM: */
982 #define ltclass (G.next_token__ltclass)
992 } else if (concat_inserted) {
993 concat_inserted = FALSE;
994 t_tclass = save_tclass;
1001 g_lineno = t_lineno;
1003 while (*p != '\n' && *p != '\0')
1012 } else if (*p == '\"') {
1015 while (*p != '\"') {
1017 if (*p == '\0' || *p == '\n')
1018 syntax_error(EMSG_UNEXP_EOS);
1020 *s++ = nextchar(&pp);
1027 } else if ((expected & TC_REGEXP) && *p == '/') {
1031 if (*p == '\0' || *p == '\n')
1032 syntax_error(EMSG_UNEXP_EOS);
1036 s[-1] = bb_process_escape_sequence((const char **)&pp);
1049 } else if (*p == '.' || isdigit(*p)) {
1052 t_double = my_strtod(&pp);
1055 syntax_error(EMSG_UNEXP_TOKEN);
1059 /* search for something known */
1064 int l = (unsigned char) *tl++;
1065 if (l == (unsigned char) NTCC) {
1069 /* if token class is expected,
1071 * and it's not a longer word,
1073 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1074 && strncmp(p, tl, l) == 0
1075 && !((tc & TC_WORD) && isalnum_(p[l]))
1077 /* then this is what we are looking for */
1085 /* not a known token */
1087 /* is it a name? (var/array/function) */
1089 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1092 while (isalnum_(*++p)) {
1097 /* also consume whitespace between functionname and bracket */
1098 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1112 /* skipping newlines in some cases */
1113 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1116 /* insert concatenation operator when needed */
1117 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1118 concat_inserted = TRUE;
1122 t_info = OC_CONCAT | SS | P(35);
1129 /* Are we ready for this? */
1130 if (!(ltclass & expected))
1131 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1132 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1135 #undef concat_inserted
1141 static void rollback_token(void)
1146 static node *new_node(uint32_t info)
1150 n = xzalloc(sizeof(node));
1152 n->lineno = g_lineno;
1156 static void mk_re_node(const char *s, node *n, regex_t *re)
1158 n->info = OC_REGEXP;
1161 xregcomp(re, s, REG_EXTENDED);
1162 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1165 static node *condition(void)
1167 next_token(TC_SEQSTART);
1168 return parse_expr(TC_SEQTERM);
1171 /* parse expression terminated by given argument, return ptr
1172 * to built subtree. Terminator is eaten by parse_expr */
1173 static node *parse_expr(uint32_t iexp)
1182 sn.r.n = glptr = NULL;
1183 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1185 while (!((tc = next_token(xtc)) & iexp)) {
1187 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1188 /* input redirection (<) attached to glptr node */
1189 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1191 xtc = TC_OPERAND | TC_UOPPRE;
1194 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1195 /* for binary and postfix-unary operators, jump back over
1196 * previous operators with higher priority */
1198 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1199 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1203 if ((t_info & OPCLSMASK) == OC_TERNARY)
1205 cn = vn->a.n->r.n = new_node(t_info);
1207 if (tc & TC_BINOP) {
1209 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1212 next_token(TC_GETLINE);
1213 /* give maximum priority to this pipe */
1214 cn->info &= ~PRIMASK;
1215 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1219 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1224 /* for operands and prefix-unary operators, attach them
1227 cn = vn->r.n = new_node(t_info);
1229 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1230 if (tc & (TC_OPERAND | TC_REGEXP)) {
1231 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1232 /* one should be very careful with switch on tclass -
1233 * only simple tclasses should be used! */
1238 v = hash_search(ahash, t_string);
1240 cn->info = OC_FNARG;
1241 cn->l.aidx = v->x.aidx;
1243 cn->l.v = newvar(t_string);
1245 if (tc & TC_ARRAY) {
1247 cn->r.n = parse_expr(TC_ARRTERM);
1254 v = cn->l.v = xzalloc(sizeof(var));
1256 setvar_i(v, t_double);
1258 setvar_s(v, t_string);
1262 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1267 cn->r.f = newfunc(t_string);
1268 cn->l.n = condition();
1272 cn = vn->r.n = parse_expr(TC_SEQTERM);
1278 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1282 cn->l.n = condition();
1291 /* add node to chain. Return ptr to alloc'd node */
1292 static node *chain_node(uint32_t info)
1297 seq->first = seq->last = new_node(0);
1299 if (seq->programname != g_progname) {
1300 seq->programname = g_progname;
1301 n = chain_node(OC_NEWSOURCE);
1302 n->l.new_progname = xstrdup(g_progname);
1307 seq->last = n->a.n = new_node(OC_DONE);
1312 static void chain_expr(uint32_t info)
1316 n = chain_node(info);
1317 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1318 if (t_tclass & TC_GRPTERM)
1322 static node *chain_loop(node *nn)
1324 node *n, *n2, *save_brk, *save_cont;
1326 save_brk = break_ptr;
1327 save_cont = continue_ptr;
1329 n = chain_node(OC_BR | Vx);
1330 continue_ptr = new_node(OC_EXEC);
1331 break_ptr = new_node(OC_EXEC);
1333 n2 = chain_node(OC_EXEC | Vx);
1336 continue_ptr->a.n = n2;
1337 break_ptr->a.n = n->r.n = seq->last;
1339 continue_ptr = save_cont;
1340 break_ptr = save_brk;
1345 /* parse group and attach it to chain */
1346 static void chain_group(void)
1352 c = next_token(TC_GRPSEQ);
1353 } while (c & TC_NEWLINE);
1355 if (c & TC_GRPSTART) {
1356 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1357 if (t_tclass & TC_NEWLINE)
1362 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1364 chain_expr(OC_EXEC | Vx);
1365 } else { /* TC_STATEMNT */
1366 switch (t_info & OPCLSMASK) {
1368 n = chain_node(OC_BR | Vx);
1369 n->l.n = condition();
1371 n2 = chain_node(OC_EXEC);
1373 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1375 n2->a.n = seq->last;
1383 n = chain_loop(NULL);
1388 n2 = chain_node(OC_EXEC);
1389 n = chain_loop(NULL);
1391 next_token(TC_WHILE);
1392 n->l.n = condition();
1396 next_token(TC_SEQSTART);
1397 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1398 if (t_tclass & TC_SEQTERM) { /* for-in */
1399 if ((n2->info & OPCLSMASK) != OC_IN)
1400 syntax_error(EMSG_UNEXP_TOKEN);
1401 n = chain_node(OC_WALKINIT | VV);
1404 n = chain_loop(NULL);
1405 n->info = OC_WALKNEXT | Vx;
1407 } else { /* for (;;) */
1408 n = chain_node(OC_EXEC | Vx);
1410 n2 = parse_expr(TC_SEMICOL);
1411 n3 = parse_expr(TC_SEQTERM);
1421 n = chain_node(t_info);
1422 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1423 if (t_tclass & TC_OUTRDR) {
1425 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1427 if (t_tclass & TC_GRPTERM)
1432 n = chain_node(OC_EXEC);
1437 n = chain_node(OC_EXEC);
1438 n->a.n = continue_ptr;
1441 /* delete, next, nextfile, return, exit */
1448 static void parse_program(char *p)
1457 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1458 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1460 if (tclass & TC_OPTERM)
1464 if (tclass & TC_BEGIN) {
1468 } else if (tclass & TC_END) {
1472 } else if (tclass & TC_FUNCDECL) {
1473 next_token(TC_FUNCTION);
1475 f = newfunc(t_string);
1476 f->body.first = NULL;
1478 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1479 v = findvar(ahash, t_string);
1480 v->x.aidx = f->nargs++;
1482 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1489 } else if (tclass & TC_OPSEQ) {
1491 cn = chain_node(OC_TEST);
1492 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1493 if (t_tclass & TC_GRPSTART) {
1497 chain_node(OC_PRINT);
1499 cn->r.n = mainseq.last;
1501 } else /* if (tclass & TC_GRPSTART) */ {
1509 /* -------- program execution part -------- */
1511 static node *mk_splitter(const char *s, tsplitter *spl)
1519 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1521 regfree(ire); // TODO: nuke ire, use re+1?
1523 if (s[0] && s[1]) { /* strlen(s) > 1 */
1524 mk_re_node(s, n, re);
1526 n->info = (uint32_t) s[0];
1532 /* use node as a regular expression. Supplied with node ptr and regex_t
1533 * storage space. Return ptr to regex (if result points to preg, it should
1534 * be later regfree'd manually
1536 static regex_t *as_regex(node *op, regex_t *preg)
1542 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1543 return icase ? op->r.ire : op->l.re;
1546 s = getvar_s(evaluate(op, v));
1548 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1549 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1550 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1551 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1552 * (maybe gsub is not supposed to use REG_EXTENDED?).
1554 if (regcomp(preg, s, cflags)) {
1555 cflags &= ~REG_EXTENDED;
1556 xregcomp(preg, s, cflags);
1562 /* gradually increasing buffer.
1563 * note that we reallocate even if n == old_size,
1564 * and thus there is at least one extra allocated byte.
1566 static char* qrealloc(char *b, int n, int *size)
1568 if (!b || n >= *size) {
1569 *size = n + (n>>1) + 80;
1570 b = xrealloc(b, *size);
1575 /* resize field storage space */
1576 static void fsrealloc(int size)
1580 if (size >= maxfields) {
1582 maxfields = size + 16;
1583 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1584 for (; i < maxfields; i++) {
1585 Fields[i].type = VF_SPECIAL;
1586 Fields[i].string = NULL;
1589 /* if size < nfields, clear extra field variables */
1590 for (i = size; i < nfields; i++) {
1596 static int awk_split(const char *s, node *spl, char **slist)
1601 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1603 /* in worst case, each char would be a separate field */
1604 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1607 c[0] = c[1] = (char)spl->info;
1609 if (*getvar_s(intvar[RS]) == '\0')
1613 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1615 return n; /* "": zero fields */
1616 n++; /* at least one field will be there */
1618 l = strcspn(s, c+2); /* len till next NUL or \n */
1619 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1620 && pmatch[0].rm_so <= l
1622 l = pmatch[0].rm_so;
1623 if (pmatch[0].rm_eo == 0) {
1627 n++; /* we saw yet another delimiter */
1629 pmatch[0].rm_eo = l;
1634 /* make sure we remove *all* of the separator chars */
1637 } while (++l < pmatch[0].rm_eo);
1639 s += pmatch[0].rm_eo;
1643 if (c[0] == '\0') { /* null split */
1651 if (c[0] != ' ') { /* single-character split */
1653 c[0] = toupper(c[0]);
1654 c[1] = tolower(c[1]);
1658 while ((s1 = strpbrk(s1, c)) != NULL) {
1666 s = skip_whitespace(s);
1670 while (*s && !isspace(*s))
1677 static void split_f0(void)
1679 /* static char *fstrings; */
1680 #define fstrings (G.split_f0__fstrings)
1691 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1694 for (i = 0; i < n; i++) {
1695 Fields[i].string = nextword(&s);
1696 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1699 /* set NF manually to avoid side effects */
1701 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1702 intvar[NF]->number = nfields;
1706 /* perform additional actions when some internal variables changed */
1707 static void handle_special(var *v)
1711 const char *sep, *s;
1712 int sl, l, len, i, bsize;
1714 if (!(v->type & VF_SPECIAL))
1717 if (v == intvar[NF]) {
1718 n = (int)getvar_i(v);
1721 /* recalculate $0 */
1722 sep = getvar_s(intvar[OFS]);
1726 for (i = 0; i < n; i++) {
1727 s = getvar_s(&Fields[i]);
1730 memcpy(b+len, sep, sl);
1733 b = qrealloc(b, len+l+sl, &bsize);
1734 memcpy(b+len, s, l);
1739 setvar_p(intvar[F0], b);
1742 } else if (v == intvar[F0]) {
1743 is_f0_split = FALSE;
1745 } else if (v == intvar[FS]) {
1746 mk_splitter(getvar_s(v), &fsplitter);
1748 } else if (v == intvar[RS]) {
1749 mk_splitter(getvar_s(v), &rsplitter);
1751 } else if (v == intvar[IGNORECASE]) {
1755 n = getvar_i(intvar[NF]);
1756 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1757 /* right here v is invalid. Just to note... */
1761 /* step through func/builtin/etc arguments */
1762 static node *nextarg(node **pn)
1767 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1776 static void hashwalk_init(var *v, xhash *array)
1781 walker_list *prev_walker;
1783 if (v->type & VF_WALK) {
1784 prev_walker = v->x.walker;
1789 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1791 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1792 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1793 w->cur = w->end = w->wbuf;
1794 w->prev = prev_walker;
1795 for (i = 0; i < array->csize; i++) {
1796 hi = array->items[i];
1798 strcpy(w->end, hi->name);
1805 static int hashwalk_next(var *v)
1807 walker_list *w = v->x.walker;
1809 if (w->cur >= w->end) {
1810 walker_list *prev_walker = w->prev;
1812 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1814 v->x.walker = prev_walker;
1818 setvar_s(v, nextword(&w->cur));
1822 /* evaluate node, return 1 when result is true, 0 otherwise */
1823 static int ptest(node *pattern)
1825 /* ptest__v is "static": to save stack space? */
1826 return istrue(evaluate(pattern, &G.ptest__v));
1829 /* read next record from stream rsm into a variable v */
1830 static int awk_getline(rstream *rsm, var *v)
1833 regmatch_t pmatch[2];
1834 int size, a, p, pp = 0;
1835 int fd, so, eo, r, rp;
1838 debug_printf_eval("entered %s()\n", __func__);
1840 /* we're using our own buffer since we need access to accumulating
1843 fd = fileno(rsm->F);
1848 c = (char) rsplitter.n.info;
1852 m = qrealloc(m, 256, &size);
1859 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1860 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1861 b, 1, pmatch, 0) == 0) {
1862 so = pmatch[0].rm_so;
1863 eo = pmatch[0].rm_eo;
1867 } else if (c != '\0') {
1868 s = strchr(b+pp, c);
1870 s = memchr(b+pp, '\0', p - pp);
1877 while (b[rp] == '\n')
1879 s = strstr(b+rp, "\n\n");
1882 while (b[eo] == '\n')
1891 memmove(m, m+a, p+1);
1896 m = qrealloc(m, a+p+128, &size);
1899 p += safe_read(fd, b+p, size-p-1);
1903 setvar_i(intvar[ERRNO], errno);
1912 c = b[so]; b[so] = '\0';
1916 c = b[eo]; b[eo] = '\0';
1917 setvar_s(intvar[RT], b+so);
1926 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1931 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1935 const char *s = format;
1937 if (int_as_int && n == (int)n) {
1938 r = snprintf(b, size, "%d", (int)n);
1940 do { c = *s; } while (c && *++s);
1941 if (strchr("diouxX", c)) {
1942 r = snprintf(b, size, format, (int)n);
1943 } else if (strchr("eEfgG", c)) {
1944 r = snprintf(b, size, format, n);
1946 syntax_error(EMSG_INV_FMT);
1952 /* formatted output into an allocated buffer, return ptr to buffer */
1953 static char *awk_printf(node *n)
1958 int i, j, incr, bsize;
1963 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1968 while (*f && (*f != '%' || *++f == '%'))
1970 while (*f && !isalpha(*f)) {
1972 syntax_error("%*x formats are not supported");
1976 incr = (f - s) + MAXVARFMT;
1977 b = qrealloc(b, incr + i, &bsize);
1983 arg = evaluate(nextarg(&n), v);
1986 if (c == 'c' || !c) {
1987 i += sprintf(b+i, s, is_numeric(arg) ?
1988 (char)getvar_i(arg) : *getvar_s(arg));
1989 } else if (c == 's') {
1991 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1992 i += sprintf(b+i, s, s1);
1994 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1998 /* if there was an error while sprintf, return value is negative */
2005 b = xrealloc(b, i + 1);
2010 /* Common substitution routine.
2011 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2012 * store result into (dest), return number of substitutions.
2013 * If nm = 0, replace all matches.
2014 * If src or dst is NULL, use $0.
2015 * If subexp != 0, enable subexpression matching (\1-\9).
2017 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2021 int match_no, residx, replen, resbufsize;
2023 regmatch_t pmatch[10];
2024 regex_t sreg, *regex;
2030 regex = as_regex(rn, &sreg);
2031 sp = getvar_s(src ? src : intvar[F0]);
2032 replen = strlen(repl);
2033 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2034 int so = pmatch[0].rm_so;
2035 int eo = pmatch[0].rm_eo;
2037 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2038 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2039 memcpy(resbuf + residx, sp, eo);
2041 if (++match_no >= nm) {
2046 residx -= (eo - so);
2048 for (s = repl; *s; s++) {
2049 char c = resbuf[residx++] = *s;
2054 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2056 residx -= ((nbs + 3) >> 1);
2063 resbuf[residx++] = c;
2065 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2066 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2067 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2075 regexec_flags = REG_NOTBOL;
2080 /* Empty match (e.g. "b*" will match anywhere).
2081 * Advance by one char. */
2083 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2084 //... and will erroneously match "b" even though it is NOT at the word start.
2085 //we need REG_NOTBOW but it does not exist...
2086 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2087 //it should be able to do it correctly.
2088 /* Subtle: this is safe only because
2089 * qrealloc allocated at least one extra byte */
2090 resbuf[residx] = *sp;
2098 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2099 strcpy(resbuf + residx, sp);
2101 //bb_error_msg("end sp:'%s'%p", sp,sp);
2102 setvar_p(dest ? dest : intvar[F0], resbuf);
2108 static NOINLINE int do_mktime(const char *ds)
2113 /*memset(&then, 0, sizeof(then)); - not needed */
2114 then.tm_isdst = -1; /* default is unknown */
2116 /* manpage of mktime says these fields are ints,
2117 * so we can sscanf stuff directly into them */
2118 count = sscanf(ds, "%u %u %u %u %u %u %d",
2119 &then.tm_year, &then.tm_mon, &then.tm_mday,
2120 &then.tm_hour, &then.tm_min, &then.tm_sec,
2124 || (unsigned)then.tm_mon < 1
2125 || (unsigned)then.tm_year < 1900
2131 then.tm_year -= 1900;
2133 return mktime(&then);
2136 static NOINLINE var *exec_builtin(node *op, var *res)
2138 #define tspl (G.exec_builtin__tspl)
2144 regmatch_t pmatch[2];
2153 isr = info = op->info;
2156 av[2] = av[3] = NULL;
2157 for (i = 0; i < 4 && op; i++) {
2158 an[i] = nextarg(&op);
2159 if (isr & 0x09000000)
2160 av[i] = evaluate(an[i], &tv[i]);
2161 if (isr & 0x08000000)
2162 as[i] = getvar_s(av[i]);
2167 if ((uint32_t)nargs < (info >> 30))
2168 syntax_error(EMSG_TOO_FEW_ARGS);
2174 if (ENABLE_FEATURE_AWK_LIBM)
2175 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2177 syntax_error(EMSG_NO_MATH);
2184 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2185 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2190 n = awk_split(as[0], spl, &s);
2192 clear_array(iamarray(av[1]));
2193 for (i = 1; i <= n; i++)
2194 setari_u(av[1], i, nextword(&s));
2204 i = getvar_i(av[1]) - 1;
2209 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2212 s = xstrndup(as[0]+i, n);
2217 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2218 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2220 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2224 setvar_i(res, ~getvar_i_int(av[0]));
2228 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2232 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2236 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2240 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2246 s1 = s = xstrdup(as[0]);
2248 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2249 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2250 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2260 l = strlen(as[0]) - ll;
2261 if (ll > 0 && l >= 0) {
2263 char *s = strstr(as[0], as[1]);
2265 n = (s - as[0]) + 1;
2267 /* this piece of code is terribly slow and
2268 * really should be rewritten
2270 for (i = 0; i <= l; i++) {
2271 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2283 tt = getvar_i(av[1]);
2286 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2287 i = strftime(g_buf, MAXVARFMT,
2288 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2291 setvar_s(res, g_buf);
2295 setvar_i(res, do_mktime(as[0]));
2299 re = as_regex(an[1], &sreg);
2300 n = regexec(re, as[0], 1, pmatch, 0);
2305 pmatch[0].rm_so = 0;
2306 pmatch[0].rm_eo = -1;
2308 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2309 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2310 setvar_i(res, pmatch[0].rm_so);
2316 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2320 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2324 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2334 * Evaluate node - the heart of the program. Supplied with subtree
2335 * and place where to store result. returns ptr to result.
2337 #define XC(n) ((n) >> 8)
2339 static var *evaluate(node *op, var *res)
2341 /* This procedure is recursive so we should count every byte */
2342 #define fnargs (G.evaluate__fnargs)
2343 /* seed is initialized to 1 */
2344 #define seed (G.evaluate__seed)
2345 #define sreg (G.evaluate__sreg)
2350 return setvar_s(res, NULL);
2352 debug_printf_eval("entered %s()\n", __func__);
2360 } L = L; /* for compiler */
2371 opn = (opinfo & OPNMASK);
2372 g_lineno = op->lineno;
2374 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2376 /* execute inevitable things */
2377 if (opinfo & OF_RES1)
2378 L.v = evaluate(op1, v1);
2379 if (opinfo & OF_RES2)
2380 R.v = evaluate(op->r.n, v1+1);
2381 if (opinfo & OF_STR1) {
2382 L.s = getvar_s(L.v);
2383 debug_printf_eval("L.s:'%s'\n", L.s);
2385 if (opinfo & OF_STR2) {
2386 R.s = getvar_s(R.v);
2387 debug_printf_eval("R.s:'%s'\n", R.s);
2389 if (opinfo & OF_NUM1) {
2390 L_d = getvar_i(L.v);
2391 debug_printf_eval("L_d:%f\n", L_d);
2394 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2395 switch (XC(opinfo & OPCLSMASK)) {
2397 /* -- iterative node type -- */
2401 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2402 /* it's range pattern */
2403 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2404 op->info |= OF_CHECKED;
2405 if (ptest(op1->r.n))
2406 op->info &= ~OF_CHECKED;
2412 op = ptest(op1) ? op->a.n : op->r.n;
2416 /* just evaluate an expression, also used as unconditional jump */
2420 /* branch, used in if-else and various loops */
2422 op = istrue(L.v) ? op->a.n : op->r.n;
2425 /* initialize for-in loop */
2426 case XC( OC_WALKINIT ):
2427 hashwalk_init(L.v, iamarray(R.v));
2430 /* get next array item */
2431 case XC( OC_WALKNEXT ):
2432 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2435 case XC( OC_PRINT ):
2436 case XC( OC_PRINTF ): {
2440 rstream *rsm = newfile(R.s);
2443 rsm->F = popen(R.s, "w");
2445 bb_perror_msg_and_die("popen");
2448 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2454 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2456 fputs(getvar_s(intvar[F0]), F);
2459 var *v = evaluate(nextarg(&op1), v1);
2460 if (v->type & VF_NUMBER) {
2461 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2465 fputs(getvar_s(v), F);
2469 fputs(getvar_s(intvar[OFS]), F);
2472 fputs(getvar_s(intvar[ORS]), F);
2474 } else { /* OC_PRINTF */
2475 char *s = awk_printf(op1);
2483 case XC( OC_DELETE ): {
2484 uint32_t info = op1->info & OPCLSMASK;
2487 if (info == OC_VAR) {
2489 } else if (info == OC_FNARG) {
2490 v = &fnargs[op1->l.aidx];
2492 syntax_error(EMSG_NOT_ARRAY);
2498 s = getvar_s(evaluate(op1->r.n, v1));
2499 hash_remove(iamarray(v), s);
2501 clear_array(iamarray(v));
2506 case XC( OC_NEWSOURCE ):
2507 g_progname = op->l.new_progname;
2510 case XC( OC_RETURN ):
2514 case XC( OC_NEXTFILE ):
2525 /* -- recursive node type -- */
2529 if (L.v == intvar[NF])
2533 case XC( OC_FNARG ):
2534 L.v = &fnargs[op->l.aidx];
2536 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2540 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2543 case XC( OC_REGEXP ):
2545 L.s = getvar_s(intvar[F0]);
2548 case XC( OC_MATCH ):
2552 regex_t *re = as_regex(op1, &sreg);
2553 int i = regexec(re, L.s, 0, NULL, 0);
2556 setvar_i(res, (i == 0) ^ (opn == '!'));
2561 debug_printf_eval("MOVE\n");
2562 /* if source is a temporary string, jusk relink it to dest */
2563 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2564 //then L.v ends up being a string, which is wrong
2565 // if (R.v == v1+1 && R.v->string) {
2566 // res = setvar_p(L.v, R.v->string);
2567 // R.v->string = NULL;
2569 res = copyvar(L.v, R.v);
2573 case XC( OC_TERNARY ):
2574 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2575 syntax_error(EMSG_POSSIBLE_ERROR);
2576 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2579 case XC( OC_FUNC ): {
2581 const char *sv_progname;
2583 if (!op->r.f->body.first)
2584 syntax_error(EMSG_UNDEF_FUNC);
2586 vbeg = v = nvalloc(op->r.f->nargs + 1);
2588 var *arg = evaluate(nextarg(&op1), v1);
2590 v->type |= VF_CHILD;
2592 if (++v - vbeg >= op->r.f->nargs)
2598 sv_progname = g_progname;
2600 res = evaluate(op->r.f->body.first, res);
2602 g_progname = sv_progname;
2609 case XC( OC_GETLINE ):
2610 case XC( OC_PGETLINE ): {
2617 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2618 rsm->F = popen(L.s, "r");
2619 rsm->is_pipe = TRUE;
2621 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2626 iF = next_input_file();
2630 if (!rsm || !rsm->F) {
2631 setvar_i(intvar[ERRNO], errno);
2639 i = awk_getline(rsm, R.v);
2640 if (i > 0 && !op1) {
2641 incvar(intvar[FNR]);
2648 /* simple builtins */
2649 case XC( OC_FBLTIN ): {
2650 double R_d = R_d; /* for compiler */
2658 R_d = (double)rand() / (double)RAND_MAX;
2662 if (ENABLE_FEATURE_AWK_LIBM) {
2668 if (ENABLE_FEATURE_AWK_LIBM) {
2674 if (ENABLE_FEATURE_AWK_LIBM) {
2680 if (ENABLE_FEATURE_AWK_LIBM) {
2686 if (ENABLE_FEATURE_AWK_LIBM) {
2691 syntax_error(EMSG_NO_MATH);
2696 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2706 L.s = getvar_s(intvar[F0]);
2712 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2713 ? (system(L.s) >> 8) : 0;
2719 } else if (L.s && *L.s) {
2720 rstream *rsm = newfile(L.s);
2730 rsm = (rstream *)hash_search(fdhash, L.s);
2731 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2733 debug_printf_eval("OC_FBLTIN F_cl "
2734 "rsm->is_pipe:%d, ->F:%p\n",
2735 rsm->is_pipe, rsm->F);
2736 /* Can be NULL if open failed. Example:
2737 * getline line <"doesnt_exist";
2738 * close("doesnt_exist"); <--- here rsm->F is NULL
2741 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2743 hash_remove(fdhash, L.s);
2746 setvar_i(intvar[ERRNO], errno);
2755 case XC( OC_BUILTIN ):
2756 res = exec_builtin(op, res);
2759 case XC( OC_SPRINTF ):
2760 setvar_p(res, awk_printf(op1));
2763 case XC( OC_UNARY ): {
2766 Ld = R_d = getvar_i(R.v);
2793 case XC( OC_FIELD ): {
2794 int i = (int)getvar_i(R.v);
2801 res = &Fields[i - 1];
2806 /* concatenation (" ") and index joining (",") */
2807 case XC( OC_CONCAT ):
2808 case XC( OC_COMMA ): {
2809 const char *sep = "";
2810 if ((opinfo & OPCLSMASK) == OC_COMMA)
2811 sep = getvar_s(intvar[SUBSEP]);
2812 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2817 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2821 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2824 case XC( OC_BINARY ):
2825 case XC( OC_REPLACE ): {
2826 double R_d = getvar_i(R.v);
2827 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2840 syntax_error(EMSG_DIV_BY_ZERO);
2844 if (ENABLE_FEATURE_AWK_LIBM)
2845 L_d = pow(L_d, R_d);
2847 syntax_error(EMSG_NO_MATH);
2851 syntax_error(EMSG_DIV_BY_ZERO);
2852 L_d -= (int)(L_d / R_d) * R_d;
2855 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2856 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2860 case XC( OC_COMPARE ): {
2861 int i = i; /* for compiler */
2864 if (is_numeric(L.v) && is_numeric(R.v)) {
2865 Ld = getvar_i(L.v) - getvar_i(R.v);
2867 const char *l = getvar_s(L.v);
2868 const char *r = getvar_s(R.v);
2869 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2871 switch (opn & 0xfe) {
2882 setvar_i(res, (i == 0) ^ (opn & 1));
2887 syntax_error(EMSG_POSSIBLE_ERROR);
2889 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2891 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2898 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2906 /* -------- main & co. -------- */
2908 static int awk_exit(int r)
2919 evaluate(endseq.first, &tv);
2922 /* waiting for children */
2923 for (i = 0; i < fdhash->csize; i++) {
2924 hi = fdhash->items[i];
2926 if (hi->data.rs.F && hi->data.rs.is_pipe)
2927 pclose(hi->data.rs.F);
2935 /* if expr looks like "var=value", perform assignment and return 1,
2936 * otherwise return 0 */
2937 static int is_assignment(const char *expr)
2939 char *exprc, *val, *s, *s1;
2941 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2945 exprc = xstrdup(expr);
2946 val = exprc + (val - expr);
2950 while ((*s1 = nextchar(&s)) != '\0')
2953 setvar_u(newvar(exprc), val);
2958 /* switch to next input file */
2959 static rstream *next_input_file(void)
2961 #define rsm (G.next_input_file__rsm)
2962 #define files_happen (G.next_input_file__files_happen)
2965 const char *fname, *ind;
2970 rsm.pos = rsm.adv = 0;
2973 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2980 ind = getvar_s(incvar(intvar[ARGIND]));
2981 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2982 if (fname && *fname && !is_assignment(fname)) {
2983 F = xfopen_stdin(fname);
2988 files_happen = TRUE;
2989 setvar_s(intvar[FILENAME], fname);
2996 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2997 int awk_main(int argc, char **argv)
3000 char *opt_F, *opt_W;
3001 llist_t *list_v = NULL;
3002 llist_t *list_f = NULL;
3007 char *vnames = (char *)vNames; /* cheat */
3008 char *vvalues = (char *)vValues;
3012 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3013 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3014 if (ENABLE_LOCALE_SUPPORT)
3015 setlocale(LC_NUMERIC, "C");
3019 /* allocate global buffer */
3020 g_buf = xmalloc(MAXVARFMT + 1);
3022 vhash = hash_init();
3023 ahash = hash_init();
3024 fdhash = hash_init();
3025 fnhash = hash_init();
3027 /* initialize variables */
3028 for (i = 0; *vnames; i++) {
3029 intvar[i] = v = newvar(nextword(&vnames));
3030 if (*vvalues != '\377')
3031 setvar_s(v, nextword(&vvalues));
3035 if (*vnames == '*') {
3036 v->type |= VF_SPECIAL;
3041 handle_special(intvar[FS]);
3042 handle_special(intvar[RS]);
3044 newfile("/dev/stdin")->F = stdin;
3045 newfile("/dev/stdout")->F = stdout;
3046 newfile("/dev/stderr")->F = stderr;
3048 /* Huh, people report that sometimes environ is NULL. Oh well. */
3049 if (environ) for (envp = environ; *envp; envp++) {
3050 /* environ is writable, thus we don't strdup it needlessly */
3052 char *s1 = strchr(s, '=');
3055 /* Both findvar and setvar_u take const char*
3056 * as 2nd arg -> environment is not trashed */
3057 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3061 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3062 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3066 setvar_s(intvar[FS], opt_F); // -F
3067 while (list_v) { /* -v */
3068 if (!is_assignment(llist_pop(&list_v)))
3071 if (list_f) { /* -f */
3076 g_progname = llist_pop(&list_f);
3077 from_file = xfopen_stdin(g_progname);
3078 /* one byte is reserved for some trick in next_token */
3079 for (i = j = 1; j > 0; i += j) {
3080 s = xrealloc(s, i + 4096);
3081 j = fread(s + i, 1, 4094, from_file);
3085 parse_program(s + 1);
3089 } else { // no -f: take program from 1st parameter
3092 g_progname = "cmd. line";
3093 parse_program(*argv++);
3095 if (opt & 0x8) // -W
3096 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3098 /* fill in ARGV array */
3099 setvar_i(intvar[ARGC], argc);
3100 setari_u(intvar[ARGV], 0, "awk");
3103 setari_u(intvar[ARGV], ++i, *argv++);
3105 evaluate(beginseq.first, &tv);
3106 if (!mainseq.first && !endseq.first)
3107 awk_exit(EXIT_SUCCESS);
3109 /* input file could already be opened in BEGIN block */
3111 iF = next_input_file();
3113 /* passing through input files */
3116 setvar_i(intvar[FNR], 0);
3118 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3121 incvar(intvar[FNR]);
3122 evaluate(mainseq.first, &tv);
3129 syntax_error(strerror(errno));
3131 iF = next_input_file();
3134 awk_exit(EXIT_SUCCESS);