1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned short type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
68 unsigned short is_pipe;
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
164 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
171 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
175 TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
185 TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
225 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
227 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
228 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
229 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
231 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
232 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
233 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
234 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
235 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
236 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
237 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
238 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
241 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
245 /* simple builtins */
247 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT=0, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
372 ENVIRON, F0, _intvarcount_
375 static const char vNames[] =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
383 static const char vValues[] =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61;
391 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
392 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
396 extern char **environ;
398 static var * V[_intvarcount_];
399 static chain beginseq, mainseq, endseq, *seq;
400 static int nextrec, nextfile;
401 static node *break_ptr, *continue_ptr;
403 static xhash *vhash, *ahash, *fdhash, *fnhash;
404 static const char *programname;
406 static int is_f0_split;
409 static tsplitter fsplitter, rsplitter;
424 /* It had even better name: 't'. Whoever knows what is it, please rename! */
426 /* function prototypes */
427 static void handle_special(var *);
428 static node *parse_expr(uint32_t);
429 static void chain_group(void);
430 static var *evaluate(node *, var *);
431 static rstream *next_input_file(void);
432 static int fmt_num(char *, int, const char *, double, int);
433 static int awk_exit(int) ATTRIBUTE_NORETURN;
435 /* ---- error handling ---- */
437 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
438 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
439 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
440 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
441 static const char EMSG_INV_FMT[] = "Invalid format specifier";
442 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
443 static const char EMSG_NOT_ARRAY[] = "Not an array";
444 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
445 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
446 #if !ENABLE_FEATURE_AWK_MATH
447 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
450 static void zero_out_var(var * vp)
452 memset(vp, 0, sizeof(*vp));
455 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
456 static void syntax_error(const char * const message)
458 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
461 #define runtime_error(x) syntax_error(x)
464 /* ---- hash stuff ---- */
466 static unsigned hashidx(const char *name)
470 while (*name) idx = *name++ + (idx << 6) - idx;
474 /* create new hash */
475 static xhash *hash_init(void)
479 newhash = xzalloc(sizeof(xhash));
480 newhash->csize = FIRST_PRIME;
481 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
486 /* find item in hash, return ptr to data, NULL if not found */
487 static void *hash_search(xhash *hash, const char *name)
491 hi = hash->items [ hashidx(name) % hash->csize ];
493 if (strcmp(hi->name, name) == 0)
500 /* grow hash if it becomes too big */
501 static void hash_rebuild(xhash *hash)
503 unsigned newsize, i, idx;
504 hash_item **newitems, *hi, *thi;
506 if (hash->nprime == NPRIMES)
509 newsize = PRIMES[hash->nprime++];
510 newitems = xzalloc(newsize * sizeof(hash_item *));
512 for (i=0; i<hash->csize; i++) {
517 idx = hashidx(thi->name) % newsize;
518 thi->next = newitems[idx];
524 hash->csize = newsize;
525 hash->items = newitems;
528 /* find item in hash, add it if necessary. Return ptr to data */
529 static void *hash_find(xhash *hash, const char *name)
535 hi = hash_search(hash, name);
537 if (++hash->nel / hash->csize > 10)
540 l = strlen(name) + 1;
541 hi = xzalloc(sizeof(hash_item) + l);
542 memcpy(hi->name, name, l);
544 idx = hashidx(name) % hash->csize;
545 hi->next = hash->items[idx];
546 hash->items[idx] = hi;
552 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
553 #define newvar(name) ((var*) hash_find(vhash , (name)))
554 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
555 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
557 static void hash_remove(xhash *hash, const char *name)
559 hash_item *hi, **phi;
561 phi = &(hash->items[ hashidx(name) % hash->csize ]);
564 if (strcmp(hi->name, name) == 0) {
565 hash->glen -= (strlen(name) + 1);
575 /* ------ some useful functions ------ */
577 static void skip_spaces(char **s)
582 if (*p == '\\' && p[1] == '\n') {
585 } else if (*p != ' ' && *p != '\t') {
593 static char *nextword(char **s)
597 while (*(*s)++) /* */;
602 static char nextchar(char **s)
608 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
609 if (c == '\\' && *s == pps) c = *((*s)++);
613 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
615 return (isalnum(c) || c == '_');
618 static FILE *afopen(const char *path, const char *mode)
620 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
623 /* -------- working with variables (set/get/copy/etc) -------- */
625 static xhash *iamarray(var *v)
629 while (a->type & VF_CHILD)
632 if (!(a->type & VF_ARRAY)) {
634 a->x.array = hash_init();
639 static void clear_array(xhash *array)
644 for (i = 0; i < array->csize; i++) {
645 hi = array->items[i];
649 free(thi->data.v.string);
652 array->items[i] = NULL;
654 array->glen = array->nel = 0;
657 /* clear a variable */
658 static var *clrvar(var *v)
660 if (!(v->type & VF_FSTR))
663 v->type &= VF_DONTTOUCH;
669 /* assign string value to variable */
670 static var *setvar_p(var *v, char *value)
679 /* same as setvar_p but make a copy of string */
680 static var *setvar_s(var *v, const char *value)
682 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
685 /* same as setvar_s but set USER flag */
686 static var *setvar_u(var *v, const char *value)
693 /* set array element to user string */
694 static void setari_u(var *a, int idx, const char *s)
697 static char sidx[12];
699 sprintf(sidx, "%d", idx);
700 v = findvar(iamarray(a), sidx);
704 /* assign numeric value to variable */
705 static var *setvar_i(var *v, double value)
708 v->type |= VF_NUMBER;
714 static const char *getvar_s(var *v)
716 /* if v is numeric and has no cached string, convert it to string */
717 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
718 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
719 v->string = xstrdup(buf);
720 v->type |= VF_CACHED;
722 return (v->string == NULL) ? "" : v->string;
725 static double getvar_i(var *v)
729 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
733 v->number = strtod(s, &s);
734 if (v->type & VF_USER) {
742 v->type |= VF_CACHED;
747 static var *copyvar(var *dest, const var *src)
751 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
752 dest->number = src->number;
754 dest->string = xstrdup(src->string);
756 handle_special(dest);
760 static var *incvar(var *v)
762 return setvar_i(v, getvar_i(v)+1.);
765 /* return true if v is number or numeric string */
766 static int is_numeric(var *v)
769 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
772 /* return 1 when value of v corresponds to true, 0 otherwise */
773 static int istrue(var *v)
776 return (v->number == 0) ? 0 : 1;
778 return (v->string && *(v->string)) ? 1 : 0;
781 /* temporary variables allocator. Last allocated should be first freed */
782 static var *nvalloc(int n)
790 if ((cb->pos - cb->nv) + n <= cb->size) break;
795 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
796 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
801 if (pb) pb->next = cb;
807 while (v < cb->pos) {
816 static void nvfree(var *v)
820 if (v < cb->nv || v >= cb->pos)
821 runtime_error(EMSG_INTERNAL_ERROR);
823 for (p=v; p<cb->pos; p++) {
824 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
825 clear_array(iamarray(p));
826 free(p->x.array->items);
829 if (p->type & VF_WALK)
836 while (cb->prev && cb->pos == cb->nv) {
841 /* ------- awk program text parsing ------- */
843 /* Parse next token pointed by global pos, place results into global ttt.
844 * If token isn't expected, give away. Return token class
846 static uint32_t next_token(uint32_t expected)
848 static int concat_inserted;
849 static uint32_t save_tclass, save_info;
850 static uint32_t ltclass = TC_OPTERM;
859 ttt.rollback = FALSE;
861 } else if (concat_inserted) {
862 concat_inserted = FALSE;
863 ttt.tclass = save_tclass;
864 ttt.info = save_info;
872 while (*p != '\n' && *p != '\0') p++;
880 } else if (*p == '\"') {
882 ttt.string = s = ++p;
884 if (*p == '\0' || *p == '\n')
885 syntax_error(EMSG_UNEXP_EOS);
886 *(s++) = nextchar(&p);
892 } else if ((expected & TC_REGEXP) && *p == '/') {
894 ttt.string = s = ++p;
896 if (*p == '\0' || *p == '\n')
897 syntax_error(EMSG_UNEXP_EOS);
898 if ((*s++ = *p++) == '\\') {
900 *(s-1) = bb_process_escape_sequence((const char **)&p);
901 if (*pp == '\\') *s++ = '\\';
902 if (p == pp) *s++ = *p++;
909 } else if (*p == '.' || isdigit(*p)) {
911 ttt.number = strtod(p, &p);
913 syntax_error(EMSG_UNEXP_TOKEN);
917 /* search for something known */
927 /* if token class is expected, token
928 * matches and it's not a longer word,
929 * then this is what we are looking for
931 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
932 *tl == *p && strncmp(p, tl, l) == 0 &&
933 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
943 /* it's a name (var/array/function),
944 * otherwise it's something wrong
947 syntax_error(EMSG_UNEXP_TOKEN);
950 while (isalnum_(*(++p))) {
955 /* also consume whitespace between functionname and bracket */
956 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
969 /* skipping newlines in some cases */
970 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
973 /* insert concatenation operator when needed */
974 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
975 concat_inserted = TRUE;
977 save_info = ttt.info;
979 ttt.info = OC_CONCAT | SS | P(35);
984 ltclass = ttt.tclass;
986 /* Are we ready for this? */
987 if (! (ltclass & expected))
988 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
989 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
994 static void rollback_token(void)
999 static node *new_node(uint32_t info)
1003 n = xzalloc(sizeof(node));
1009 static node *mk_re_node(const char *s, node *n, regex_t *re)
1011 n->info = OC_REGEXP;
1014 xregcomp(re, s, REG_EXTENDED);
1015 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1020 static node *condition(void)
1022 next_token(TC_SEQSTART);
1023 return parse_expr(TC_SEQTERM);
1026 /* parse expression terminated by given argument, return ptr
1027 * to built subtree. Terminator is eaten by parse_expr */
1028 static node *parse_expr(uint32_t iexp)
1037 sn.r.n = glptr = NULL;
1038 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1040 while (!((tc = next_token(xtc)) & iexp)) {
1041 if (glptr && (ttt.info == (OC_COMPARE|VV|P(39)|2))) {
1042 /* input redirection (<) attached to glptr node */
1043 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1045 xtc = TC_OPERAND | TC_UOPPRE;
1048 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1049 /* for binary and postfix-unary operators, jump back over
1050 * previous operators with higher priority */
1052 while ( ((ttt.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1053 ((ttt.info == vn->info) && ((ttt.info & OPCLSMASK) == OC_COLON)) )
1055 if ((ttt.info & OPCLSMASK) == OC_TERNARY)
1057 cn = vn->a.n->r.n = new_node(ttt.info);
1059 if (tc & TC_BINOP) {
1061 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1062 if ((ttt.info & OPCLSMASK) == OC_PGETLINE) {
1064 next_token(TC_GETLINE);
1065 /* give maximum priority to this pipe */
1066 cn->info &= ~PRIMASK;
1067 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1071 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1076 /* for operands and prefix-unary operators, attach them
1079 cn = vn->r.n = new_node(ttt.info);
1081 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1082 if (tc & (TC_OPERAND | TC_REGEXP)) {
1083 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1084 /* one should be very careful with switch on tclass -
1085 * only simple tclasses should be used! */
1090 if ((v = hash_search(ahash, ttt.string)) != NULL) {
1091 cn->info = OC_FNARG;
1092 cn->l.i = v->x.aidx;
1094 cn->l.v = newvar(ttt.string);
1096 if (tc & TC_ARRAY) {
1098 cn->r.n = parse_expr(TC_ARRTERM);
1105 v = cn->l.v = xzalloc(sizeof(var));
1107 setvar_i(v, ttt.number);
1109 setvar_s(v, ttt.string);
1113 mk_re_node(ttt.string, cn, xzalloc(sizeof(regex_t)*2));
1118 cn->r.f = newfunc(ttt.string);
1119 cn->l.n = condition();
1123 cn = vn->r.n = parse_expr(TC_SEQTERM);
1129 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1133 cn->l.n = condition();
1142 /* add node to chain. Return ptr to alloc'd node */
1143 static node *chain_node(uint32_t info)
1148 seq->first = seq->last = new_node(0);
1150 if (seq->programname != programname) {
1151 seq->programname = programname;
1152 n = chain_node(OC_NEWSOURCE);
1153 n->l.s = xstrdup(programname);
1158 seq->last = n->a.n = new_node(OC_DONE);
1163 static void chain_expr(uint32_t info)
1167 n = chain_node(info);
1168 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1169 if (ttt.tclass & TC_GRPTERM)
1173 static node *chain_loop(node *nn)
1175 node *n, *n2, *save_brk, *save_cont;
1177 save_brk = break_ptr;
1178 save_cont = continue_ptr;
1180 n = chain_node(OC_BR | Vx);
1181 continue_ptr = new_node(OC_EXEC);
1182 break_ptr = new_node(OC_EXEC);
1184 n2 = chain_node(OC_EXEC | Vx);
1187 continue_ptr->a.n = n2;
1188 break_ptr->a.n = n->r.n = seq->last;
1190 continue_ptr = save_cont;
1191 break_ptr = save_brk;
1196 /* parse group and attach it to chain */
1197 static void chain_group(void)
1203 c = next_token(TC_GRPSEQ);
1204 } while (c & TC_NEWLINE);
1206 if (c & TC_GRPSTART) {
1207 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1208 if (ttt.tclass & TC_NEWLINE) continue;
1212 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1214 chain_expr(OC_EXEC | Vx);
1215 } else { /* TC_STATEMNT */
1216 switch (ttt.info & OPCLSMASK) {
1218 n = chain_node(OC_BR | Vx);
1219 n->l.n = condition();
1221 n2 = chain_node(OC_EXEC);
1223 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1225 n2->a.n = seq->last;
1233 n = chain_loop(NULL);
1238 n2 = chain_node(OC_EXEC);
1239 n = chain_loop(NULL);
1241 next_token(TC_WHILE);
1242 n->l.n = condition();
1246 next_token(TC_SEQSTART);
1247 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1248 if (ttt.tclass & TC_SEQTERM) { /* for-in */
1249 if ((n2->info & OPCLSMASK) != OC_IN)
1250 syntax_error(EMSG_UNEXP_TOKEN);
1251 n = chain_node(OC_WALKINIT | VV);
1254 n = chain_loop(NULL);
1255 n->info = OC_WALKNEXT | Vx;
1257 } else { /* for (;;) */
1258 n = chain_node(OC_EXEC | Vx);
1260 n2 = parse_expr(TC_SEMICOL);
1261 n3 = parse_expr(TC_SEQTERM);
1271 n = chain_node(ttt.info);
1272 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1273 if (ttt.tclass & TC_OUTRDR) {
1274 n->info |= ttt.info;
1275 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1277 if (ttt.tclass & TC_GRPTERM)
1282 n = chain_node(OC_EXEC);
1287 n = chain_node(OC_EXEC);
1288 n->a.n = continue_ptr;
1291 /* delete, next, nextfile, return, exit */
1293 chain_expr(ttt.info);
1298 static void parse_program(char *p)
1307 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1308 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1310 if (tclass & TC_OPTERM)
1314 if (tclass & TC_BEGIN) {
1318 } else if (tclass & TC_END) {
1322 } else if (tclass & TC_FUNCDECL) {
1323 next_token(TC_FUNCTION);
1325 f = newfunc(ttt.string);
1326 f->body.first = NULL;
1328 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1329 v = findvar(ahash, ttt.string);
1330 v->x.aidx = (f->nargs)++;
1332 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1339 } else if (tclass & TC_OPSEQ) {
1341 cn = chain_node(OC_TEST);
1342 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1343 if (ttt.tclass & TC_GRPSTART) {
1347 chain_node(OC_PRINT);
1349 cn->r.n = mainseq.last;
1351 } else /* if (tclass & TC_GRPSTART) */ {
1359 /* -------- program execution part -------- */
1361 static node *mk_splitter(const char *s, tsplitter *spl)
1369 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1373 if (strlen(s) > 1) {
1374 mk_re_node(s, n, re);
1376 n->info = (uint32_t) *s;
1382 /* use node as a regular expression. Supplied with node ptr and regex_t
1383 * storage space. Return ptr to regex (if result points to preg, it should
1384 * be later regfree'd manually
1386 static regex_t *as_regex(node *op, regex_t *preg)
1391 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1392 return icase ? op->r.ire : op->l.re;
1395 s = getvar_s(evaluate(op, v));
1396 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1402 /* gradually increasing buffer */
1403 static void qrealloc(char **b, int n, int *size)
1405 if (!*b || n >= *size)
1406 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1409 /* resize field storage space */
1410 static void fsrealloc(int size)
1412 static int maxfields; /* = 0;*/
1415 if (size >= maxfields) {
1417 maxfields = size + 16;
1418 Fields = xrealloc(Fields, maxfields * sizeof(var));
1419 for (; i < maxfields; i++) {
1420 Fields[i].type = VF_SPECIAL;
1421 Fields[i].string = NULL;
1425 if (size < nfields) {
1426 for (i = size; i < nfields; i++) {
1433 static int awk_split(const char *s, node *spl, char **slist)
1438 regmatch_t pmatch[2];
1440 /* in worst case, each char would be a separate field */
1441 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1444 c[0] = c[1] = (char)spl->info;
1446 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1448 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1450 l = strcspn(s, c+2);
1451 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1452 && pmatch[0].rm_so <= l
1454 l = pmatch[0].rm_so;
1455 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1457 pmatch[0].rm_eo = l;
1458 if (s[l]) pmatch[0].rm_eo++;
1464 s += pmatch[0].rm_eo;
1467 } else if (c[0] == '\0') { /* null split */
1473 } else if (c[0] != ' ') { /* single-character split */
1475 c[0] = toupper(c[0]);
1476 c[1] = tolower(c[1]);
1479 while ((s1 = strpbrk(s1, c))) {
1483 } else { /* space split */
1485 s = skip_whitespace(s);
1488 while (*s && !isspace(*s))
1496 static void split_f0(void)
1498 static char *fstrings = NULL;
1508 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1511 for (i = 0; i < n; i++) {
1512 Fields[i].string = nextword(&s);
1513 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1516 /* set NF manually to avoid side effects */
1518 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1519 V[NF]->number = nfields;
1522 /* perform additional actions when some internal variables changed */
1523 static void handle_special(var *v)
1527 const char *sep, *s;
1528 int sl, l, len, i, bsize;
1530 if (!(v->type & VF_SPECIAL))
1534 n = (int)getvar_i(v);
1537 /* recalculate $0 */
1538 sep = getvar_s(V[OFS]);
1542 for (i=0; i<n; i++) {
1543 s = getvar_s(&Fields[i]);
1546 memcpy(b+len, sep, sl);
1549 qrealloc(&b, len+l+sl, &bsize);
1550 memcpy(b+len, s, l);
1558 } else if (v == V[F0]) {
1559 is_f0_split = FALSE;
1561 } else if (v == V[FS]) {
1562 mk_splitter(getvar_s(v), &fsplitter);
1564 } else if (v == V[RS]) {
1565 mk_splitter(getvar_s(v), &rsplitter);
1567 } else if (v == V[IGNORECASE]) {
1571 n = getvar_i(V[NF]);
1572 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1573 /* right here v is invalid. Just to note... */
1577 /* step through func/builtin/etc arguments */
1578 static node *nextarg(node **pn)
1583 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1592 static void hashwalk_init(var *v, xhash *array)
1598 if (v->type & VF_WALK)
1602 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1603 *w = *(w+1) = (char *)(w + 2);
1604 for (i=0; i<array->csize; i++) {
1605 hi = array->items[i];
1607 strcpy(*w, hi->name);
1614 static int hashwalk_next(var *v)
1622 setvar_s(v, nextword(w+1));
1626 /* evaluate node, return 1 when result is true, 0 otherwise */
1627 static int ptest(node *pattern)
1629 static var v; /* static: to save stack space? */
1631 return istrue(evaluate(pattern, &v));
1634 /* read next record from stream rsm into a variable v */
1635 static int awk_getline(rstream *rsm, var *v)
1638 regmatch_t pmatch[2];
1639 int a, p, pp=0, size;
1640 int fd, so, eo, r, rp;
1643 /* we're using our own buffer since we need access to accumulating
1646 fd = fileno(rsm->F);
1651 c = (char) rsplitter.n.info;
1654 if (! m) qrealloc(&m, 256, &size);
1660 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1661 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1662 b, 1, pmatch, 0) == 0) {
1663 so = pmatch[0].rm_so;
1664 eo = pmatch[0].rm_eo;
1668 } else if (c != '\0') {
1669 s = strchr(b+pp, c);
1670 if (! s) s = memchr(b+pp, '\0', p - pp);
1677 while (b[rp] == '\n')
1679 s = strstr(b+rp, "\n\n");
1682 while (b[eo] == '\n') eo++;
1690 memmove(m, (const void *)(m+a), p+1);
1695 qrealloc(&m, a+p+128, &size);
1698 p += safe_read(fd, b+p, size-p-1);
1702 setvar_i(V[ERRNO], errno);
1711 c = b[so]; b[so] = '\0';
1715 c = b[eo]; b[eo] = '\0';
1716 setvar_s(V[RT], b+so);
1728 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1732 const char *s = format;
1734 if (int_as_int && n == (int)n) {
1735 r = snprintf(b, size, "%d", (int)n);
1737 do { c = *s; } while (c && *++s);
1738 if (strchr("diouxX", c)) {
1739 r = snprintf(b, size, format, (int)n);
1740 } else if (strchr("eEfgG", c)) {
1741 r = snprintf(b, size, format, n);
1743 runtime_error(EMSG_INV_FMT);
1750 /* formatted output into an allocated buffer, return ptr to buffer */
1751 static char *awk_printf(node *n)
1756 int i, j, incr, bsize;
1761 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1766 while (*f && (*f != '%' || *(++f) == '%'))
1768 while (*f && !isalpha(*f)) {
1770 syntax_error("%*x formats are not supported");
1774 incr = (f - s) + MAXVARFMT;
1775 qrealloc(&b, incr + i, &bsize);
1780 arg = evaluate(nextarg(&n), v);
1783 if (c == 'c' || !c) {
1784 i += sprintf(b+i, s, is_numeric(arg) ?
1785 (char)getvar_i(arg) : *getvar_s(arg));
1787 } else if (c == 's') {
1789 qrealloc(&b, incr+i+strlen(s1), &bsize);
1790 i += sprintf(b+i, s, s1);
1793 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1797 /* if there was an error while sprintf, return value is negative */
1801 b = xrealloc(b, i + 1);
1808 /* common substitution routine
1809 * replace (nm) substring of (src) that match (n) with (repl), store
1810 * result into (dest), return number of substitutions. If nm=0, replace
1811 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1812 * subexpression matching (\1-\9)
1814 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1819 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1820 regmatch_t pmatch[10];
1823 re = as_regex(rn, &sreg);
1824 if (! src) src = V[F0];
1825 if (! dest) dest = V[F0];
1830 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1831 so = pmatch[0].rm_so;
1832 eo = pmatch[0].rm_eo;
1834 qrealloc(&ds, di + eo + rl, &dssize);
1835 memcpy(ds + di, sp, eo);
1841 for (s = repl; *s; s++) {
1847 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1848 di -= ((nbs + 3) >> 1);
1857 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1858 qrealloc(&ds, di + rl + n, &dssize);
1859 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1870 if (! (ds[di++] = *sp++)) break;
1874 qrealloc(&ds, di + strlen(sp), &dssize);
1875 strcpy(ds + di, sp);
1877 if (re == &sreg) regfree(re);
1881 static var *exec_builtin(node *op, var *res)
1888 regmatch_t pmatch[2];
1890 static tsplitter tspl;
1899 isr = info = op->info;
1902 av[2] = av[3] = NULL;
1903 for (i=0 ; i<4 && op ; i++) {
1904 an[i] = nextarg(&op);
1905 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1906 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1911 if (nargs < (info >> 30))
1912 runtime_error(EMSG_TOO_FEW_ARGS);
1914 switch (info & OPNMASK) {
1917 #if ENABLE_FEATURE_AWK_MATH
1918 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1920 runtime_error(EMSG_NO_MATH);
1926 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1927 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1932 n = awk_split(as[0], spl, &s);
1934 clear_array(iamarray(av[1]));
1935 for (i=1; i<=n; i++)
1936 setari_u(av[1], i, nextword(&s1));
1943 i = getvar_i(av[1]) - 1;
1944 if (i>l) i=l; if (i<0) i=0;
1945 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1948 strncpy(s, as[0]+i, n);
1954 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1958 setvar_i(res, ~(long)getvar_i(av[0]));
1962 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1966 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1970 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1974 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1984 s1 = s = xstrdup(as[0]);
1986 *s1 = (*to_xxx)(*s1);
1995 l = strlen(as[0]) - ll;
1996 if (ll > 0 && l >= 0) {
1998 s = strstr(as[0], as[1]);
1999 if (s) n = (s - as[0]) + 1;
2001 /* this piece of code is terribly slow and
2002 * really should be rewritten
2004 for (i=0; i<=l; i++) {
2005 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2017 tt = getvar_i(av[1]);
2020 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2021 i = strftime(buf, MAXVARFMT,
2022 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2029 re = as_regex(an[1], &sreg);
2030 n = regexec(re, as[0], 1, pmatch, 0);
2035 pmatch[0].rm_so = 0;
2036 pmatch[0].rm_eo = -1;
2038 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2039 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2040 setvar_i(res, pmatch[0].rm_so);
2041 if (re == &sreg) regfree(re);
2045 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2049 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2053 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2062 * Evaluate node - the heart of the program. Supplied with subtree
2063 * and place where to store result. returns ptr to result.
2065 #define XC(n) ((n) >> 8)
2067 static var *evaluate(node *op, var *res)
2069 /* This procedure is recursive so we should count every byte */
2070 static var *fnargs = NULL;
2071 static unsigned seed = 1;
2072 static regex_t sreg;
2094 return setvar_s(res, NULL);
2100 opn = (short)(opinfo & OPNMASK);
2101 lineno = op->lineno;
2103 /* execute inevitable things */
2105 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2106 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2107 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2108 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2109 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2111 switch (XC(opinfo & OPCLSMASK)) {
2113 /* -- iterative node type -- */
2117 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2118 /* it's range pattern */
2119 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2120 op->info |= OF_CHECKED;
2121 if (ptest(op1->r.n))
2122 op->info &= ~OF_CHECKED;
2129 op = (ptest(op1)) ? op->a.n : op->r.n;
2133 /* just evaluate an expression, also used as unconditional jump */
2137 /* branch, used in if-else and various loops */
2139 op = istrue(L.v) ? op->a.n : op->r.n;
2142 /* initialize for-in loop */
2143 case XC( OC_WALKINIT ):
2144 hashwalk_init(L.v, iamarray(R.v));
2147 /* get next array item */
2148 case XC( OC_WALKNEXT ):
2149 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2152 case XC( OC_PRINT ):
2153 case XC( OC_PRINTF ):
2156 X.rsm = newfile(R.s);
2159 X.rsm->F = popen(R.s, "w");
2160 if (X.rsm->F == NULL)
2161 bb_perror_msg_and_die("popen");
2164 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2170 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2172 fputs(getvar_s(V[F0]), X.F);
2175 L.v = evaluate(nextarg(&op1), v1);
2176 if (L.v->type & VF_NUMBER) {
2177 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2178 getvar_i(L.v), TRUE);
2181 fputs(getvar_s(L.v), X.F);
2184 if (op1) fputs(getvar_s(V[OFS]), X.F);
2187 fputs(getvar_s(V[ORS]), X.F);
2189 } else { /* OC_PRINTF */
2190 L.s = awk_printf(op1);
2197 case XC( OC_DELETE ):
2198 X.info = op1->info & OPCLSMASK;
2199 if (X.info == OC_VAR) {
2201 } else if (X.info == OC_FNARG) {
2202 R.v = &fnargs[op1->l.i];
2204 runtime_error(EMSG_NOT_ARRAY);
2209 L.s = getvar_s(evaluate(op1->r.n, v1));
2210 hash_remove(iamarray(R.v), L.s);
2212 clear_array(iamarray(R.v));
2216 case XC( OC_NEWSOURCE ):
2217 programname = op->l.s;
2220 case XC( OC_RETURN ):
2224 case XC( OC_NEXTFILE ):
2235 /* -- recursive node type -- */
2243 case XC( OC_FNARG ):
2244 L.v = &fnargs[op->l.i];
2246 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2250 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2253 case XC( OC_REGEXP ):
2255 L.s = getvar_s(V[F0]);
2258 case XC( OC_MATCH ):
2261 X.re = as_regex(op1, &sreg);
2262 R.i = regexec(X.re, L.s, 0, NULL, 0);
2263 if (X.re == &sreg) regfree(X.re);
2264 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2268 /* if source is a temporary string, jusk relink it to dest */
2269 if (R.v == v1+1 && R.v->string) {
2270 res = setvar_p(L.v, R.v->string);
2273 res = copyvar(L.v, R.v);
2277 case XC( OC_TERNARY ):
2278 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2279 runtime_error(EMSG_POSSIBLE_ERROR);
2280 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2284 if (! op->r.f->body.first)
2285 runtime_error(EMSG_UNDEF_FUNC);
2287 X.v = R.v = nvalloc(op->r.f->nargs+1);
2289 L.v = evaluate(nextarg(&op1), v1);
2291 R.v->type |= VF_CHILD;
2292 R.v->x.parent = L.v;
2293 if (++R.v - X.v >= op->r.f->nargs)
2301 res = evaluate(op->r.f->body.first, res);
2308 case XC( OC_GETLINE ):
2309 case XC( OC_PGETLINE ):
2311 X.rsm = newfile(L.s);
2313 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2314 X.rsm->F = popen(L.s, "r");
2315 X.rsm->is_pipe = TRUE;
2317 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2321 if (! iF) iF = next_input_file();
2326 setvar_i(V[ERRNO], errno);
2334 L.i = awk_getline(X.rsm, R.v);
2344 /* simple builtins */
2345 case XC( OC_FBLTIN ):
2353 R.d = (double)rand() / (double)RAND_MAX;
2355 #if ENABLE_FEATURE_AWK_MATH
2381 runtime_error(EMSG_NO_MATH);
2386 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2396 L.s = getvar_s(V[F0]);
2402 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2403 ? (system(L.s) >> 8) : 0;
2411 X.rsm = newfile(L.s);
2420 X.rsm = (rstream *)hash_search(fdhash, L.s);
2422 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2423 free(X.rsm->buffer);
2424 hash_remove(fdhash, L.s);
2427 setvar_i(V[ERRNO], errno);
2434 case XC( OC_BUILTIN ):
2435 res = exec_builtin(op, res);
2438 case XC( OC_SPRINTF ):
2439 setvar_p(res, awk_printf(op1));
2442 case XC( OC_UNARY ):
2444 L.d = R.d = getvar_i(R.v);
2459 L.d = istrue(X.v) ? 0 : 1;
2470 case XC( OC_FIELD ):
2471 R.i = (int)getvar_i(R.v);
2479 res = &Fields[R.i-1];
2483 /* concatenation (" ") and index joining (",") */
2484 case XC( OC_CONCAT ):
2485 case XC( OC_COMMA ):
2486 opn = strlen(L.s) + strlen(R.s) + 2;
2489 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2490 L.s = getvar_s(V[SUBSEP]);
2491 X.s = xrealloc(X.s, opn + strlen(L.s));
2499 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2503 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2506 case XC( OC_BINARY ):
2507 case XC( OC_REPLACE ):
2508 R.d = getvar_i(R.v);
2520 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2524 #if ENABLE_FEATURE_AWK_MATH
2525 L.d = pow(L.d, R.d);
2527 runtime_error(EMSG_NO_MATH);
2531 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2532 L.d -= (int)(L.d / R.d) * R.d;
2535 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2538 case XC( OC_COMPARE ):
2539 if (is_numeric(L.v) && is_numeric(R.v)) {
2540 L.d = getvar_i(L.v) - getvar_i(R.v);
2542 L.s = getvar_s(L.v);
2543 R.s = getvar_s(R.v);
2544 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2546 switch (opn & 0xfe) {
2557 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2561 runtime_error(EMSG_POSSIBLE_ERROR);
2563 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2565 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2575 /* -------- main & co. -------- */
2577 static int awk_exit(int r)
2588 evaluate(endseq.first, &tv);
2591 /* waiting for children */
2592 for (i = 0; i < fdhash->csize; i++) {
2593 hi = fdhash->items[i];
2595 if (hi->data.rs.F && hi->data.rs.is_pipe)
2596 pclose(hi->data.rs.F);
2604 /* if expr looks like "var=value", perform assignment and return 1,
2605 * otherwise return 0 */
2606 static int is_assignment(const char *expr)
2608 char *exprc, *s, *s0, *s1;
2610 exprc = xstrdup(expr);
2611 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2619 *(s1++) = nextchar(&s);
2622 setvar_u(newvar(exprc), s0);
2627 /* switch to next input file */
2628 static rstream *next_input_file(void)
2632 const char *fname, *ind;
2633 static int files_happen = FALSE;
2635 if (rsm.F) fclose(rsm.F);
2637 rsm.pos = rsm.adv = 0;
2640 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2646 ind = getvar_s(incvar(V[ARGIND]));
2647 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2648 if (fname && *fname && !is_assignment(fname))
2649 F = afopen(fname, "r");
2653 files_happen = TRUE;
2654 setvar_s(V[FILENAME], fname);
2659 int awk_main(int argc, char **argv);
2660 int awk_main(int argc, char **argv)
2663 char *opt_F, *opt_W;
2664 llist_t *opt_v = NULL;
2669 char *vnames = (char *)vNames; /* cheat */
2670 char *vvalues = (char *)vValues;
2672 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2673 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2674 if (ENABLE_LOCALE_SUPPORT)
2675 setlocale(LC_NUMERIC, "C");
2679 /* allocate global buffer */
2680 buf = xmalloc(MAXVARFMT + 1);
2682 vhash = hash_init();
2683 ahash = hash_init();
2684 fdhash = hash_init();
2685 fnhash = hash_init();
2687 /* initialize variables */
2688 for (i = 0; *vnames; i++) {
2689 V[i] = v = newvar(nextword(&vnames));
2690 if (*vvalues != '\377')
2691 setvar_s(v, nextword(&vvalues));
2695 if (*vnames == '*') {
2696 v->type |= VF_SPECIAL;
2701 handle_special(V[FS]);
2702 handle_special(V[RS]);
2704 newfile("/dev/stdin")->F = stdin;
2705 newfile("/dev/stdout")->F = stdout;
2706 newfile("/dev/stderr")->F = stderr;
2708 /* Huh, people report that sometimes environ is NULL. Oh well. */
2709 if (environ) for (envp = environ; *envp; envp++) {
2710 char *s = xstrdup(*envp);
2711 char *s1 = strchr(s, '=');
2714 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2718 opt_complementary = "v::";
2719 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2722 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2723 while (opt_v) { /* -v */
2724 if (!is_assignment(llist_pop(&opt_v)))
2727 if (opt & 0x4) { // -f
2728 char *s = s; /* die, gcc, die */
2729 FILE *from_file = afopen(programname, "r");
2730 /* one byte is reserved for some trick in next_token */
2731 if (fseek(from_file, 0, SEEK_END) == 0) {
2732 flen = ftell(from_file);
2733 s = xmalloc(flen + 4);
2734 fseek(from_file, 0, SEEK_SET);
2735 i = 1 + fread(s + 1, 1, flen, from_file);
2737 for (i = j = 1; j > 0; i += j) {
2738 s = xrealloc(s, i + 4096);
2739 j = fread(s + i, 1, 4094, from_file);
2744 parse_program(s + 1);
2746 } else { // no -f: take program from 1st parameter
2749 programname = "cmd. line";
2750 parse_program(*argv++);
2753 if (opt & 0x8) // -W
2754 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2756 /* fill in ARGV array */
2757 setvar_i(V[ARGC], argc + 1);
2758 setari_u(V[ARGV], 0, "awk");
2761 setari_u(V[ARGV], ++i, *argv++);
2763 evaluate(beginseq.first, &tv);
2764 if (!mainseq.first && !endseq.first)
2765 awk_exit(EXIT_SUCCESS);
2767 /* input file could already be opened in BEGIN block */
2768 if (!iF) iF = next_input_file();
2770 /* passing through input files */
2773 setvar_i(V[FNR], 0);
2775 while ((i = awk_getline(iF, V[F0])) > 0) {
2779 evaluate(mainseq.first, &tv);
2786 runtime_error(strerror(errno));
2788 iF = next_input_file();
2791 awk_exit(EXIT_SUCCESS);