1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34 typedef struct var_s {
35 unsigned short type; /* flags */
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
54 typedef struct func_s {
60 typedef struct rstream_s {
66 unsigned short is_pipe;
69 typedef struct hash_item_s {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
79 typedef struct xhash_s {
80 unsigned int nel; /* num of elements */
81 unsigned int csize; /* current hash size */
82 unsigned int nprime; /* next hash size in PRIMES[] */
83 unsigned int glen; /* summary length of item names */
84 struct hash_item_s **items;
88 typedef struct node_s {
90 unsigned short lineno;
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
118 typedef struct tsplitter_s {
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
193 /* combined operator flags */
196 #define xS (OF_RES2 | OF_STR2)
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
216 /* Operation classes */
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
243 /* simple builtins */
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
253 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256 /* tokens and their corresponding info values */
258 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define OC_B OC_BUILTIN
263 static char * const tokenlist =
266 "\1/" NTC /* REGEXP */
267 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
268 "\2++" "\2--" NTC /* UOPPOST */
269 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
270 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
271 "\2*=" "\2/=" "\2%=" "\2^="
272 "\1+" "\1-" "\3**=" "\2**"
273 "\1/" "\1%" "\1^" "\1*"
274 "\2!=" "\2>=" "\2<=" "\1>"
275 "\1<" "\2!~" "\1~" "\2&&"
276 "\2||" "\1?" "\1:" NTC
280 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
286 "\2if" "\2do" "\3for" "\5break" /* STATX */
287 "\10continue" "\6delete" "\5print"
288 "\6printf" "\4next" "\10nextfile"
289 "\6return" "\4exit" NTC
293 "\3and" "\5compl" "\6lshift" "\2or"
295 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
296 "\3cos" "\3exp" "\3int" "\3log"
297 "\4rand" "\3sin" "\4sqrt" "\5srand"
298 "\6gensub" "\4gsub" "\5index" "\6length"
299 "\5match" "\5split" "\7sprintf" "\3sub"
300 "\6substr" "\7systime" "\10strftime"
301 "\7tolower" "\7toupper" NTC
303 "\4func" "\10function" NTC
308 static const uint32_t tokeninfo[] = {
313 xS|'a', xS|'w', xS|'|',
314 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
315 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
318 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
319 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
320 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
321 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
322 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
323 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
324 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
325 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
326 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
327 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
328 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
329 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
330 OC_COLON|xx|P(67)|':',
333 OC_PGETLINE|SV|P(37),
334 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
335 OC_UNARY|xV|P(19)|'!',
341 ST_IF, ST_DO, ST_FOR, OC_BREAK,
342 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
343 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
344 OC_RETURN|Vx, OC_EXIT|Nx,
348 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
349 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
350 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
351 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
352 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
353 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
354 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
355 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
356 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
363 /* internal variable names and their initial values */
364 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 CONVFMT=0, OFMT, FS, OFS,
367 ORS, RS, RT, FILENAME,
368 SUBSEP, ARGIND, ARGC, ARGV,
371 ENVIRON, F0, _intvarcount_
374 static char * vNames =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
379 "NR\0" "NF\0*" "IGNORECASE\0*"
380 "ENVIRON\0" "$\0*" "\0";
382 static char * vValues =
383 "%.6g\0" "%.6g\0" " \0" " \0"
384 "\n\0" "\n\0" "\0" "\0"
388 /* hash size may grow to these values */
389 #define FIRST_PRIME 61;
390 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
391 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
395 extern char **environ;
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static char *programname;
405 static int is_f0_split;
408 static tsplitter fsplitter, rsplitter;
424 /* function prototypes */
425 static void handle_special(var *);
426 static node *parse_expr(uint32_t);
427 static void chain_group(void);
428 static var *evaluate(node *, var *);
429 static rstream *next_input_file(void);
430 static int fmt_num(char *, int, const char *, double, int);
431 static int awk_exit(int) ATTRIBUTE_NORETURN;
433 /* ---- error handling ---- */
435 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
436 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
437 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
438 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
439 static const char EMSG_INV_FMT[] = "Invalid format specifier";
440 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
441 static const char EMSG_NOT_ARRAY[] = "Not an array";
442 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
443 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
444 #ifndef CONFIG_FEATURE_AWK_MATH
445 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
448 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
449 static void syntax_error(const char * const message)
451 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
454 #define runtime_error(x) syntax_error(x)
457 /* ---- hash stuff ---- */
459 static unsigned int hashidx(const char *name)
463 while (*name) idx = *name++ + (idx << 6) - idx;
467 /* create new hash */
468 static xhash *hash_init(void)
472 newhash = xzalloc(sizeof(xhash));
473 newhash->csize = FIRST_PRIME;
474 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
479 /* find item in hash, return ptr to data, NULL if not found */
480 static void *hash_search(xhash *hash, const char *name)
484 hi = hash->items [ hashidx(name) % hash->csize ];
486 if (strcmp(hi->name, name) == 0)
493 /* grow hash if it becomes too big */
494 static void hash_rebuild(xhash *hash)
496 unsigned int newsize, i, idx;
497 hash_item **newitems, *hi, *thi;
499 if (hash->nprime == NPRIMES)
502 newsize = PRIMES[hash->nprime++];
503 newitems = xzalloc(newsize * sizeof(hash_item *));
505 for (i=0; i<hash->csize; i++) {
510 idx = hashidx(thi->name) % newsize;
511 thi->next = newitems[idx];
517 hash->csize = newsize;
518 hash->items = newitems;
521 /* find item in hash, add it if necessary. Return ptr to data */
522 static void *hash_find(xhash *hash, const char *name)
528 hi = hash_search(hash, name);
530 if (++hash->nel / hash->csize > 10)
533 l = strlen(name) + 1;
534 hi = xzalloc(sizeof(hash_item) + l);
535 memcpy(hi->name, name, l);
537 idx = hashidx(name) % hash->csize;
538 hi->next = hash->items[idx];
539 hash->items[idx] = hi;
545 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
546 #define newvar(name) (var *) hash_find ( vhash , (name) )
547 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
548 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
550 static void hash_remove(xhash *hash, const char *name)
552 hash_item *hi, **phi;
554 phi = &(hash->items[ hashidx(name) % hash->csize ]);
557 if (strcmp(hi->name, name) == 0) {
558 hash->glen -= (strlen(name) + 1);
568 /* ------ some useful functions ------ */
570 static void skip_spaces(char **s)
574 while(*p == ' ' || *p == '\t' ||
575 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
581 static char *nextword(char **s)
590 static char nextchar(char **s)
596 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
597 if (c == '\\' && *s == pps) c = *((*s)++);
601 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
603 return (isalnum(c) || c == '_');
606 static FILE *afopen(const char *path, const char *mode)
608 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
611 /* -------- working with variables (set/get/copy/etc) -------- */
613 static xhash *iamarray(var *v)
617 while (a->type & VF_CHILD)
620 if (! (a->type & VF_ARRAY)) {
622 a->x.array = hash_init();
627 static void clear_array(xhash *array)
632 for (i=0; i<array->csize; i++) {
633 hi = array->items[i];
637 free(thi->data.v.string);
640 array->items[i] = NULL;
642 array->glen = array->nel = 0;
645 /* clear a variable */
646 static var *clrvar(var *v)
648 if (!(v->type & VF_FSTR))
651 v->type &= VF_DONTTOUCH;
657 /* assign string value to variable */
658 static var *setvar_p(var *v, char *value)
667 /* same as setvar_p but make a copy of string */
668 static var *setvar_s(var *v, const char *value)
670 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
673 /* same as setvar_s but set USER flag */
674 static var *setvar_u(var *v, const char *value)
681 /* set array element to user string */
682 static void setari_u(var *a, int idx, const char *s)
685 static char sidx[12];
687 sprintf(sidx, "%d", idx);
688 v = findvar(iamarray(a), sidx);
692 /* assign numeric value to variable */
693 static var *setvar_i(var *v, double value)
696 v->type |= VF_NUMBER;
702 static char *getvar_s(var *v)
704 /* if v is numeric and has no cached string, convert it to string */
705 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
706 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
707 v->string = xstrdup(buf);
708 v->type |= VF_CACHED;
710 return (v->string == NULL) ? "" : v->string;
713 static double getvar_i(var *v)
717 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
721 v->number = strtod(s, &s);
722 if (v->type & VF_USER) {
730 v->type |= VF_CACHED;
735 static var *copyvar(var *dest, const var *src)
739 dest->type |= (src->type & ~VF_DONTTOUCH);
740 dest->number = src->number;
742 dest->string = xstrdup(src->string);
744 handle_special(dest);
748 static var *incvar(var *v)
750 return setvar_i(v, getvar_i(v)+1.);
753 /* return true if v is number or numeric string */
754 static int is_numeric(var *v)
757 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
760 /* return 1 when value of v corresponds to true, 0 otherwise */
761 static int istrue(var *v)
764 return (v->number == 0) ? 0 : 1;
766 return (v->string && *(v->string)) ? 1 : 0;
769 /* temporary variables allocator. Last allocated should be first freed */
770 static var *nvalloc(int n)
778 if ((cb->pos - cb->nv) + n <= cb->size) break;
783 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
784 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
789 if (pb) pb->next = cb;
795 while (v < cb->pos) {
804 static void nvfree(var *v)
808 if (v < cb->nv || v >= cb->pos)
809 runtime_error(EMSG_INTERNAL_ERROR);
811 for (p=v; p<cb->pos; p++) {
812 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
813 clear_array(iamarray(p));
814 free(p->x.array->items);
817 if (p->type & VF_WALK)
824 while (cb->prev && cb->pos == cb->nv) {
829 /* ------- awk program text parsing ------- */
831 /* Parse next token pointed by global pos, place results into global t.
832 * If token isn't expected, give away. Return token class
834 static uint32_t next_token(uint32_t expected)
841 static int concat_inserted;
842 static uint32_t save_tclass, save_info;
843 static uint32_t ltclass = TC_OPTERM;
849 } else if (concat_inserted) {
851 concat_inserted = FALSE;
852 t.tclass = save_tclass;
863 while (*p != '\n' && *p != '\0') p++;
871 } else if (*p == '\"') {
875 if (*p == '\0' || *p == '\n')
876 syntax_error(EMSG_UNEXP_EOS);
877 *(s++) = nextchar(&p);
883 } else if ((expected & TC_REGEXP) && *p == '/') {
887 if (*p == '\0' || *p == '\n')
888 syntax_error(EMSG_UNEXP_EOS);
889 if ((*s++ = *p++) == '\\') {
891 *(s-1) = bb_process_escape_sequence((const char **)&p);
892 if (*pp == '\\') *s++ = '\\';
893 if (p == pp) *s++ = *p++;
900 } else if (*p == '.' || isdigit(*p)) {
902 t.number = strtod(p, &p);
904 syntax_error(EMSG_UNEXP_TOKEN);
908 /* search for something known */
918 /* if token class is expected, token
919 * matches and it's not a longer word,
920 * then this is what we are looking for
922 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
923 *tl == *p && strncmp(p, tl, l) == 0 &&
924 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
934 /* it's a name (var/array/function),
935 * otherwise it's something wrong
938 syntax_error(EMSG_UNEXP_TOKEN);
941 while(isalnum_(*(++p))) {
946 /* also consume whitespace between functionname and bracket */
947 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
960 /* skipping newlines in some cases */
961 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
964 /* insert concatenation operator when needed */
965 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
966 concat_inserted = TRUE;
970 t.info = OC_CONCAT | SS | P(35);
977 /* Are we ready for this? */
978 if (! (ltclass & expected))
979 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
980 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
985 static void rollback_token(void) { t.rollback = TRUE; }
987 static node *new_node(uint32_t info)
991 n = xzalloc(sizeof(node));
997 static node *mk_re_node(char *s, node *n, regex_t *re)
1002 xregcomp(re, s, REG_EXTENDED);
1003 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1008 static node *condition(void)
1010 next_token(TC_SEQSTART);
1011 return parse_expr(TC_SEQTERM);
1014 /* parse expression terminated by given argument, return ptr
1015 * to built subtree. Terminator is eaten by parse_expr */
1016 static node *parse_expr(uint32_t iexp)
1025 sn.r.n = glptr = NULL;
1026 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1028 while (! ((tc = next_token(xtc)) & iexp)) {
1029 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1030 /* input redirection (<) attached to glptr node */
1031 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1033 xtc = TC_OPERAND | TC_UOPPRE;
1036 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1037 /* for binary and postfix-unary operators, jump back over
1038 * previous operators with higher priority */
1040 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1041 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1043 if ((t.info & OPCLSMASK) == OC_TERNARY)
1045 cn = vn->a.n->r.n = new_node(t.info);
1047 if (tc & TC_BINOP) {
1049 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1050 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1052 next_token(TC_GETLINE);
1053 /* give maximum priority to this pipe */
1054 cn->info &= ~PRIMASK;
1055 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1059 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 /* for operands and prefix-unary operators, attach them
1067 cn = vn->r.n = new_node(t.info);
1069 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1070 if (tc & (TC_OPERAND | TC_REGEXP)) {
1071 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1072 /* one should be very careful with switch on tclass -
1073 * only simple tclasses should be used! */
1078 if ((v = hash_search(ahash, t.string)) != NULL) {
1079 cn->info = OC_FNARG;
1080 cn->l.i = v->x.aidx;
1082 cn->l.v = newvar(t.string);
1084 if (tc & TC_ARRAY) {
1086 cn->r.n = parse_expr(TC_ARRTERM);
1093 v = cn->l.v = xzalloc(sizeof(var));
1095 setvar_i(v, t.number);
1097 setvar_s(v, t.string);
1101 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1106 cn->r.f = newfunc(t.string);
1107 cn->l.n = condition();
1111 cn = vn->r.n = parse_expr(TC_SEQTERM);
1117 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1121 cn->l.n = condition();
1130 /* add node to chain. Return ptr to alloc'd node */
1131 static node *chain_node(uint32_t info)
1136 seq->first = seq->last = new_node(0);
1138 if (seq->programname != programname) {
1139 seq->programname = programname;
1140 n = chain_node(OC_NEWSOURCE);
1141 n->l.s = xstrdup(programname);
1146 seq->last = n->a.n = new_node(OC_DONE);
1151 static void chain_expr(uint32_t info)
1155 n = chain_node(info);
1156 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1157 if (t.tclass & TC_GRPTERM)
1161 static node *chain_loop(node *nn)
1163 node *n, *n2, *save_brk, *save_cont;
1165 save_brk = break_ptr;
1166 save_cont = continue_ptr;
1168 n = chain_node(OC_BR | Vx);
1169 continue_ptr = new_node(OC_EXEC);
1170 break_ptr = new_node(OC_EXEC);
1172 n2 = chain_node(OC_EXEC | Vx);
1175 continue_ptr->a.n = n2;
1176 break_ptr->a.n = n->r.n = seq->last;
1178 continue_ptr = save_cont;
1179 break_ptr = save_brk;
1184 /* parse group and attach it to chain */
1185 static void chain_group(void)
1191 c = next_token(TC_GRPSEQ);
1192 } while (c & TC_NEWLINE);
1194 if (c & TC_GRPSTART) {
1195 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1196 if (t.tclass & TC_NEWLINE) continue;
1200 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1202 chain_expr(OC_EXEC | Vx);
1203 } else { /* TC_STATEMNT */
1204 switch (t.info & OPCLSMASK) {
1206 n = chain_node(OC_BR | Vx);
1207 n->l.n = condition();
1209 n2 = chain_node(OC_EXEC);
1211 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1213 n2->a.n = seq->last;
1221 n = chain_loop(NULL);
1226 n2 = chain_node(OC_EXEC);
1227 n = chain_loop(NULL);
1229 next_token(TC_WHILE);
1230 n->l.n = condition();
1234 next_token(TC_SEQSTART);
1235 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1236 if (t.tclass & TC_SEQTERM) { /* for-in */
1237 if ((n2->info & OPCLSMASK) != OC_IN)
1238 syntax_error(EMSG_UNEXP_TOKEN);
1239 n = chain_node(OC_WALKINIT | VV);
1242 n = chain_loop(NULL);
1243 n->info = OC_WALKNEXT | Vx;
1245 } else { /* for(;;) */
1246 n = chain_node(OC_EXEC | Vx);
1248 n2 = parse_expr(TC_SEMICOL);
1249 n3 = parse_expr(TC_SEQTERM);
1259 n = chain_node(t.info);
1260 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1261 if (t.tclass & TC_OUTRDR) {
1263 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1265 if (t.tclass & TC_GRPTERM)
1270 n = chain_node(OC_EXEC);
1275 n = chain_node(OC_EXEC);
1276 n->a.n = continue_ptr;
1279 /* delete, next, nextfile, return, exit */
1287 static void parse_program(char *p)
1296 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1297 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1299 if (tclass & TC_OPTERM)
1303 if (tclass & TC_BEGIN) {
1307 } else if (tclass & TC_END) {
1311 } else if (tclass & TC_FUNCDECL) {
1312 next_token(TC_FUNCTION);
1314 f = newfunc(t.string);
1315 f->body.first = NULL;
1317 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1318 v = findvar(ahash, t.string);
1319 v->x.aidx = (f->nargs)++;
1321 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1328 } else if (tclass & TC_OPSEQ) {
1330 cn = chain_node(OC_TEST);
1331 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1332 if (t.tclass & TC_GRPSTART) {
1336 chain_node(OC_PRINT);
1338 cn->r.n = mainseq.last;
1340 } else /* if (tclass & TC_GRPSTART) */ {
1348 /* -------- program execution part -------- */
1350 static node *mk_splitter(char *s, tsplitter *spl)
1358 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1362 if (strlen(s) > 1) {
1363 mk_re_node(s, n, re);
1365 n->info = (uint32_t) *s;
1371 /* use node as a regular expression. Supplied with node ptr and regex_t
1372 * storage space. Return ptr to regex (if result points to preg, it should
1373 * be later regfree'd manually
1375 static regex_t *as_regex(node *op, regex_t *preg)
1380 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1381 return icase ? op->r.ire : op->l.re;
1384 s = getvar_s(evaluate(op, v));
1385 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1391 /* gradually increasing buffer */
1392 static void qrealloc(char **b, int n, int *size)
1394 if (! *b || n >= *size)
1395 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1398 /* resize field storage space */
1399 static void fsrealloc(int size)
1401 static int maxfields = 0;
1404 if (size >= maxfields) {
1406 maxfields = size + 16;
1407 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1408 for (; i<maxfields; i++) {
1409 Fields[i].type = VF_SPECIAL;
1410 Fields[i].string = NULL;
1414 if (size < nfields) {
1415 for (i=size; i<nfields; i++) {
1422 static int awk_split(char *s, node *spl, char **slist)
1427 regmatch_t pmatch[2];
1429 /* in worst case, each char would be a separate field */
1430 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1432 c[0] = c[1] = (char)spl->info;
1434 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1436 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1438 l = strcspn(s, c+2);
1439 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1440 pmatch[0].rm_so <= l) {
1441 l = pmatch[0].rm_so;
1442 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1444 pmatch[0].rm_eo = l;
1445 if (*(s+l)) pmatch[0].rm_eo++;
1451 s += pmatch[0].rm_eo;
1454 } else if (c[0] == '\0') { /* null split */
1460 } else if (c[0] != ' ') { /* single-character split */
1462 c[0] = toupper(c[0]);
1463 c[1] = tolower(c[1]);
1466 while ((s1 = strpbrk(s1, c))) {
1470 } else { /* space split */
1472 s = skip_whitespace(s);
1475 while (*s && !isspace(*s))
1483 static void split_f0(void)
1485 static char *fstrings = NULL;
1495 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1498 for (i=0; i<n; i++) {
1499 Fields[i].string = nextword(&s);
1500 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1503 /* set NF manually to avoid side effects */
1505 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1506 V[NF]->number = nfields;
1509 /* perform additional actions when some internal variables changed */
1510 static void handle_special(var *v)
1514 int sl, l, len, i, bsize;
1516 if (! (v->type & VF_SPECIAL))
1520 n = (int)getvar_i(v);
1523 /* recalculate $0 */
1524 sep = getvar_s(V[OFS]);
1528 for (i=0; i<n; i++) {
1529 s = getvar_s(&Fields[i]);
1532 memcpy(b+len, sep, sl);
1535 qrealloc(&b, len+l+sl, &bsize);
1536 memcpy(b+len, s, l);
1539 if (b) b[len] = '\0';
1543 } else if (v == V[F0]) {
1544 is_f0_split = FALSE;
1546 } else if (v == V[FS]) {
1547 mk_splitter(getvar_s(v), &fsplitter);
1549 } else if (v == V[RS]) {
1550 mk_splitter(getvar_s(v), &rsplitter);
1552 } else if (v == V[IGNORECASE]) {
1556 n = getvar_i(V[NF]);
1557 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1558 /* right here v is invalid. Just to note... */
1562 /* step through func/builtin/etc arguments */
1563 static node *nextarg(node **pn)
1568 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1577 static void hashwalk_init(var *v, xhash *array)
1583 if (v->type & VF_WALK)
1587 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1588 *w = *(w+1) = (char *)(w + 2);
1589 for (i=0; i<array->csize; i++) {
1590 hi = array->items[i];
1592 strcpy(*w, hi->name);
1599 static int hashwalk_next(var *v)
1607 setvar_s(v, nextword(w+1));
1611 /* evaluate node, return 1 when result is true, 0 otherwise */
1612 static int ptest(node *pattern)
1615 return istrue(evaluate(pattern, &v));
1618 /* read next record from stream rsm into a variable v */
1619 static int awk_getline(rstream *rsm, var *v)
1622 regmatch_t pmatch[2];
1623 int a, p, pp=0, size;
1624 int fd, so, eo, r, rp;
1627 /* we're using our own buffer since we need access to accumulating
1630 fd = fileno(rsm->F);
1635 c = (char) rsplitter.n.info;
1638 if (! m) qrealloc(&m, 256, &size);
1644 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1645 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1646 b, 1, pmatch, 0) == 0) {
1647 so = pmatch[0].rm_so;
1648 eo = pmatch[0].rm_eo;
1652 } else if (c != '\0') {
1653 s = strchr(b+pp, c);
1654 if (! s) s = memchr(b+pp, '\0', p - pp);
1661 while (b[rp] == '\n')
1663 s = strstr(b+rp, "\n\n");
1666 while (b[eo] == '\n') eo++;
1674 memmove(m, (const void *)(m+a), p+1);
1679 qrealloc(&m, a+p+128, &size);
1682 p += safe_read(fd, b+p, size-p-1);
1686 setvar_i(V[ERRNO], errno);
1695 c = b[so]; b[so] = '\0';
1699 c = b[eo]; b[eo] = '\0';
1700 setvar_s(V[RT], b+so);
1712 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1716 const char *s=format;
1718 if (int_as_int && n == (int)n) {
1719 r = snprintf(b, size, "%d", (int)n);
1721 do { c = *s; } while (*s && *++s);
1722 if (strchr("diouxX", c)) {
1723 r = snprintf(b, size, format, (int)n);
1724 } else if (strchr("eEfgG", c)) {
1725 r = snprintf(b, size, format, n);
1727 runtime_error(EMSG_INV_FMT);
1734 /* formatted output into an allocated buffer, return ptr to buffer */
1735 static char *awk_printf(node *n)
1738 char *fmt, *s, *s1, *f;
1739 int i, j, incr, bsize;
1744 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1749 while (*f && (*f != '%' || *(++f) == '%'))
1751 while (*f && !isalpha(*f))
1754 incr = (f - s) + MAXVARFMT;
1755 qrealloc(&b, incr+i, &bsize);
1756 c = *f; if (c != '\0') f++;
1757 c1 = *f ; *f = '\0';
1758 arg = evaluate(nextarg(&n), v);
1761 if (c == 'c' || !c) {
1762 i += sprintf(b+i, s,
1763 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1765 } else if (c == 's') {
1767 qrealloc(&b, incr+i+strlen(s1), &bsize);
1768 i += sprintf(b+i, s, s1);
1771 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1775 /* if there was an error while sprintf, return value is negative */
1780 b = xrealloc(b, i+1);
1787 /* common substitution routine
1788 * replace (nm) substring of (src) that match (n) with (repl), store
1789 * result into (dest), return number of substitutions. If nm=0, replace
1790 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1791 * subexpression matching (\1-\9)
1793 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1797 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1798 regmatch_t pmatch[10];
1801 re = as_regex(rn, &sreg);
1802 if (! src) src = V[F0];
1803 if (! dest) dest = V[F0];
1808 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1809 so = pmatch[0].rm_so;
1810 eo = pmatch[0].rm_eo;
1812 qrealloc(&ds, di + eo + rl, &dssize);
1813 memcpy(ds + di, sp, eo);
1819 for (s = repl; *s; s++) {
1825 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1826 di -= ((nbs + 3) >> 1);
1835 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1836 qrealloc(&ds, di + rl + n, &dssize);
1837 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1848 if (! (ds[di++] = *sp++)) break;
1852 qrealloc(&ds, di + strlen(sp), &dssize);
1853 strcpy(ds + di, sp);
1855 if (re == &sreg) regfree(re);
1859 static var *exec_builtin(node *op, var *res)
1866 regmatch_t pmatch[2];
1868 static tsplitter tspl;
1877 isr = info = op->info;
1880 av[2] = av[3] = NULL;
1881 for (i=0 ; i<4 && op ; i++) {
1882 an[i] = nextarg(&op);
1883 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1884 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1889 if (nargs < (info >> 30))
1890 runtime_error(EMSG_TOO_FEW_ARGS);
1892 switch (info & OPNMASK) {
1895 #ifdef CONFIG_FEATURE_AWK_MATH
1896 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1898 runtime_error(EMSG_NO_MATH);
1904 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1905 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1910 n = awk_split(as[0], spl, &s);
1912 clear_array(iamarray(av[1]));
1913 for (i=1; i<=n; i++)
1914 setari_u(av[1], i, nextword(&s1));
1921 i = getvar_i(av[1]) - 1;
1922 if (i>l) i=l; if (i<0) i=0;
1923 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1926 strncpy(s, as[0]+i, n);
1932 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1936 setvar_i(res, ~(long)getvar_i(av[0]));
1940 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1944 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1948 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1952 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1962 s1 = s = xstrdup(as[0]);
1964 *s1 = (*to_xxx)(*s1);
1973 l = strlen(as[0]) - ll;
1974 if (ll > 0 && l >= 0) {
1976 s = strstr(as[0], as[1]);
1977 if (s) n = (s - as[0]) + 1;
1979 /* this piece of code is terribly slow and
1980 * really should be rewritten
1982 for (i=0; i<=l; i++) {
1983 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1995 tt = getvar_i(av[1]);
1998 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1999 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2005 re = as_regex(an[1], &sreg);
2006 n = regexec(re, as[0], 1, pmatch, 0);
2011 pmatch[0].rm_so = 0;
2012 pmatch[0].rm_eo = -1;
2014 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2015 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2016 setvar_i(res, pmatch[0].rm_so);
2017 if (re == &sreg) regfree(re);
2021 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2025 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2029 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2038 * Evaluate node - the heart of the program. Supplied with subtree
2039 * and place where to store result. returns ptr to result.
2041 #define XC(n) ((n) >> 8)
2043 static var *evaluate(node *op, var *res)
2045 /* This procedure is recursive so we should count every byte */
2046 static var *fnargs = NULL;
2047 static unsigned int seed = 1;
2048 static regex_t sreg;
2069 return setvar_s(res, NULL);
2076 opn = (short)(opinfo & OPNMASK);
2077 lineno = op->lineno;
2079 /* execute inevitable things */
2081 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2082 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2083 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2084 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2085 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2087 switch (XC(opinfo & OPCLSMASK)) {
2089 /* -- iterative node type -- */
2093 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2094 /* it's range pattern */
2095 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2096 op->info |= OF_CHECKED;
2097 if (ptest(op1->r.n))
2098 op->info &= ~OF_CHECKED;
2105 op = (ptest(op1)) ? op->a.n : op->r.n;
2109 /* just evaluate an expression, also used as unconditional jump */
2113 /* branch, used in if-else and various loops */
2115 op = istrue(L.v) ? op->a.n : op->r.n;
2118 /* initialize for-in loop */
2119 case XC( OC_WALKINIT ):
2120 hashwalk_init(L.v, iamarray(R.v));
2123 /* get next array item */
2124 case XC( OC_WALKNEXT ):
2125 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2128 case XC( OC_PRINT ):
2129 case XC( OC_PRINTF ):
2132 X.rsm = newfile(R.s);
2135 if((X.rsm->F = popen(R.s, "w")) == NULL)
2136 bb_perror_msg_and_die("popen");
2139 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2145 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2147 fputs(getvar_s(V[F0]), X.F);
2150 L.v = evaluate(nextarg(&op1), v1);
2151 if (L.v->type & VF_NUMBER) {
2152 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2153 getvar_i(L.v), TRUE);
2156 fputs(getvar_s(L.v), X.F);
2159 if (op1) fputs(getvar_s(V[OFS]), X.F);
2162 fputs(getvar_s(V[ORS]), X.F);
2164 } else { /* OC_PRINTF */
2165 L.s = awk_printf(op1);
2172 case XC( OC_DELETE ):
2173 X.info = op1->info & OPCLSMASK;
2174 if (X.info == OC_VAR) {
2176 } else if (X.info == OC_FNARG) {
2177 R.v = &fnargs[op1->l.i];
2179 runtime_error(EMSG_NOT_ARRAY);
2184 L.s = getvar_s(evaluate(op1->r.n, v1));
2185 hash_remove(iamarray(R.v), L.s);
2187 clear_array(iamarray(R.v));
2191 case XC( OC_NEWSOURCE ):
2192 programname = op->l.s;
2195 case XC( OC_RETURN ):
2199 case XC( OC_NEXTFILE ):
2210 /* -- recursive node type -- */
2218 case XC( OC_FNARG ):
2219 L.v = &fnargs[op->l.i];
2222 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2226 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2229 case XC( OC_REGEXP ):
2231 L.s = getvar_s(V[F0]);
2234 case XC( OC_MATCH ):
2237 X.re = as_regex(op1, &sreg);
2238 R.i = regexec(X.re, L.s, 0, NULL, 0);
2239 if (X.re == &sreg) regfree(X.re);
2240 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2244 /* if source is a temporary string, jusk relink it to dest */
2245 if (R.v == v1+1 && R.v->string) {
2246 res = setvar_p(L.v, R.v->string);
2249 res = copyvar(L.v, R.v);
2253 case XC( OC_TERNARY ):
2254 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2255 runtime_error(EMSG_POSSIBLE_ERROR);
2256 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2260 if (! op->r.f->body.first)
2261 runtime_error(EMSG_UNDEF_FUNC);
2263 X.v = R.v = nvalloc(op->r.f->nargs+1);
2265 L.v = evaluate(nextarg(&op1), v1);
2267 R.v->type |= VF_CHILD;
2268 R.v->x.parent = L.v;
2269 if (++R.v - X.v >= op->r.f->nargs)
2277 res = evaluate(op->r.f->body.first, res);
2284 case XC( OC_GETLINE ):
2285 case XC( OC_PGETLINE ):
2287 X.rsm = newfile(L.s);
2289 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2290 X.rsm->F = popen(L.s, "r");
2291 X.rsm->is_pipe = TRUE;
2293 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2297 if (! iF) iF = next_input_file();
2302 setvar_i(V[ERRNO], errno);
2310 L.i = awk_getline(X.rsm, R.v);
2320 /* simple builtins */
2321 case XC( OC_FBLTIN ):
2329 R.d = (double)rand() / (double)RAND_MAX;
2332 #ifdef CONFIG_FEATURE_AWK_MATH
2358 runtime_error(EMSG_NO_MATH);
2364 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2374 L.s = getvar_s(V[F0]);
2380 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2381 ? (system(L.s) >> 8) : 0;
2389 X.rsm = newfile(L.s);
2398 X.rsm = (rstream *)hash_search(fdhash, L.s);
2400 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2401 free(X.rsm->buffer);
2402 hash_remove(fdhash, L.s);
2405 setvar_i(V[ERRNO], errno);
2412 case XC( OC_BUILTIN ):
2413 res = exec_builtin(op, res);
2416 case XC( OC_SPRINTF ):
2417 setvar_p(res, awk_printf(op1));
2420 case XC( OC_UNARY ):
2422 L.d = R.d = getvar_i(R.v);
2437 L.d = istrue(X.v) ? 0 : 1;
2448 case XC( OC_FIELD ):
2449 R.i = (int)getvar_i(R.v);
2457 res = &Fields[R.i-1];
2461 /* concatenation (" ") and index joining (",") */
2462 case XC( OC_CONCAT ):
2463 case XC( OC_COMMA ):
2464 opn = strlen(L.s) + strlen(R.s) + 2;
2467 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2468 L.s = getvar_s(V[SUBSEP]);
2469 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2477 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2481 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2484 case XC( OC_BINARY ):
2485 case XC( OC_REPLACE ):
2486 R.d = getvar_i(R.v);
2498 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502 #ifdef CONFIG_FEATURE_AWK_MATH
2503 L.d = pow(L.d, R.d);
2505 runtime_error(EMSG_NO_MATH);
2509 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2510 L.d -= (int)(L.d / R.d) * R.d;
2513 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2516 case XC( OC_COMPARE ):
2517 if (is_numeric(L.v) && is_numeric(R.v)) {
2518 L.d = getvar_i(L.v) - getvar_i(R.v);
2520 L.s = getvar_s(L.v);
2521 R.s = getvar_s(R.v);
2522 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2524 switch (opn & 0xfe) {
2535 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2539 runtime_error(EMSG_POSSIBLE_ERROR);
2541 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2543 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2553 /* -------- main & co. -------- */
2555 static int awk_exit(int r)
2564 evaluate(endseq.first, &tv);
2567 /* waiting for children */
2568 for (i=0; i<fdhash->csize; i++) {
2569 hi = fdhash->items[i];
2571 if (hi->data.rs.F && hi->data.rs.is_pipe)
2572 pclose(hi->data.rs.F);
2580 /* if expr looks like "var=value", perform assignment and return 1,
2581 * otherwise return 0 */
2582 static int is_assignment(const char *expr)
2584 char *exprc, *s, *s0, *s1;
2586 exprc = xstrdup(expr);
2587 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2595 *(s1++) = nextchar(&s);
2598 setvar_u(newvar(exprc), s0);
2603 /* switch to next input file */
2604 static rstream *next_input_file(void)
2609 static int files_happen = FALSE;
2611 if (rsm.F) fclose(rsm.F);
2613 rsm.pos = rsm.adv = 0;
2616 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2622 ind = getvar_s(incvar(V[ARGIND]));
2623 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2624 if (fname && *fname && !is_assignment(fname))
2625 F = afopen(fname, "r");
2629 files_happen = TRUE;
2630 setvar_s(V[FILENAME], fname);
2635 int awk_main(int argc, char **argv)
2638 char *opt_F, *opt_v, *opt_W;
2644 static int from_file = FALSE;
2646 FILE *F, *stdfiles[3];
2647 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2649 /* allocate global buffer */
2650 buf = xmalloc(MAXVARFMT+1);
2652 vhash = hash_init();
2653 ahash = hash_init();
2654 fdhash = hash_init();
2655 fnhash = hash_init();
2657 /* initialize variables */
2658 for (i=0; *vNames; i++) {
2659 V[i] = v = newvar(nextword(&vNames));
2660 if (*vValues != '\377')
2661 setvar_s(v, nextword(&vValues));
2665 if (*vNames == '*') {
2666 v->type |= VF_SPECIAL;
2671 handle_special(V[FS]);
2672 handle_special(V[RS]);
2674 stdfiles[0] = stdin;
2675 stdfiles[1] = stdout;
2676 stdfiles[2] = stderr;
2677 for (i=0; i<3; i++) {
2678 rsm = newfile(nextword(&stdnames));
2679 rsm->F = stdfiles[i];
2682 for (envp=environ; *envp; envp++) {
2684 s1 = strchr(s, '=');
2689 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2694 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2695 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2696 if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2697 if (opt & 0x4) { // -f
2699 F = afopen(programname, "r");
2701 /* one byte is reserved for some trick in next_token */
2702 if (fseek(F, 0, SEEK_END) == 0) {
2704 s = xmalloc(flen+4);
2705 fseek(F, 0, SEEK_SET);
2706 i = 1 + fread(s+1, 1, flen, F);
2708 for (i=j=1; j>0; i+=j) {
2709 s = (char *)xrealloc(s, i+4096);
2710 j = fread(s+i, 1, 4094, F);
2718 if (opt & 0x8) // -W
2719 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2724 programname = "cmd. line";
2725 parse_program(argv[optind++]);
2729 /* fill in ARGV array */
2730 setvar_i(V[ARGC], argc - optind + 1);
2731 setari_u(V[ARGV], 0, "awk");
2732 for(i=optind; i < argc; i++)
2733 setari_u(V[ARGV], i+1-optind, argv[i]);
2735 evaluate(beginseq.first, &tv);
2736 if (! mainseq.first && ! endseq.first)
2737 awk_exit(EXIT_SUCCESS);
2739 /* input file could already be opened in BEGIN block */
2740 if (! iF) iF = next_input_file();
2742 /* passing through input files */
2746 setvar_i(V[FNR], 0);
2748 while ((c = awk_getline(iF, V[F0])) > 0) {
2753 evaluate(mainseq.first, &tv);
2760 runtime_error(strerror(errno));
2762 iF = next_input_file();
2766 awk_exit(EXIT_SUCCESS);