2 * awkgram.y --- yacc/bison parser
6 * Copyright (C) 1986, 1988, 1989, 1991-2012 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
33 #if defined(__STDC__) && __STDC__ < 1 /* VMS weirdness, maybe elsewhere */
37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
39 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
40 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
41 static char *get_src_buf(void);
42 static int yylex(void);
44 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
45 static int func_install(INSTRUCTION *fp, INSTRUCTION *def);
46 static void pop_params(NODE *params);
47 static NODE *make_param(char *pname);
48 static NODE *mk_rexp(INSTRUCTION *exp);
49 static void append_param(char *pname);
50 static int dup_parms(INSTRUCTION *fp, NODE *func);
51 static void param_sanity(INSTRUCTION *arglist);
52 static int parms_shadow(INSTRUCTION *pc, int *shadow);
53 static int isnoeffect(OPCODE type);
54 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
55 static void dumpintlstr(const char *str, size_t len);
56 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
57 static int isarray(NODE *n);
58 static int include_source(INSTRUCTION *file);
59 static void next_sourcefile(void);
60 static char *tokexpand(void);
62 #define instruction(t) bcalloc(t, 1, 0)
64 static INSTRUCTION *mk_program(void);
65 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
66 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
67 INSTRUCTION *elsep, INSTRUCTION *false_branch);
68 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
69 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
70 INSTRUCTION *incr, INSTRUCTION *body);
71 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
72 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
73 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
74 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
75 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
76 static NODE *make_regnode(int type, NODE *exp);
77 static int count_expressions(INSTRUCTION **list, int isarg);
78 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
79 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
81 enum defref { FUNC_DEFINE, FUNC_USE };
82 static void func_use(const char *name, enum defref how);
83 static void check_funcs(void);
84 static void free_bcpool(INSTRUCTION *pl);
86 static ssize_t read_one_line(int fd, void *buffer, size_t count);
87 static int one_line_close(int fd);
89 static void (*install_func)(char *) = NULL;
91 static int want_source = FALSE;
92 static int want_regexp; /* lexical scanning kludge */
93 static int can_return; /* parsing kludge */
96 const char *const ruletab[] = {
105 static int in_print = FALSE; /* lexical scanning kludge for print */
106 static int in_parens = 0; /* lexical scanning kludge for print */
107 static int sub_counter = 0; /* array dimension counter for use in delete */
108 static char *lexptr = NULL; /* pointer to next char during parsing */
110 static char *lexptr_begin; /* keep track of where we were for error msgs */
111 static char *lexeme; /* beginning of lexeme for debugging */
112 static int lexeof; /* seen EOF for current source? */
113 static char *thisline = NULL;
114 static int in_braces = 0; /* count braces for firstline, lastline in an 'action' */
115 static int lastline = 0;
116 static int firstline = 0;
117 static SRCFILE *sourcefile = NULL; /* current program source */
118 static int lasttok = 0;
119 static int eof_warned = FALSE; /* GLOBAL: want warning for each file */
120 static int break_allowed; /* kludge for break */
121 static int continue_allowed; /* kludge for continue */
124 #define END_FILE -1000
125 #define END_SRC -2000
127 #define YYDEBUG_LEXER_TEXT (lexeme)
128 static int param_counter;
129 static NODE *func_params; /* list of parameters for the current function */
130 static char *tokstart = NULL;
131 static char *tok = NULL;
133 static int errcount = 0;
135 static NODE *symbol_list;
136 extern void destroy_symbol(char *name);
138 static long func_count; /* total number of functions */
140 #define HASHSIZE 1021 /* this constant only used here */
141 NODE *variables[HASHSIZE];
142 static int var_count; /* total number of global variables */
145 extern int sourceline;
146 extern SRCFILE *srcfiles;
147 extern INSTRUCTION *rule_list;
150 static INSTRUCTION *rule_block[sizeof(ruletab)];
152 static INSTRUCTION *ip_rec;
153 static INSTRUCTION *ip_newfile;
154 static INSTRUCTION *ip_atexit = NULL;
155 static INSTRUCTION *ip_end;
156 static INSTRUCTION *ip_endfile;
157 static INSTRUCTION *ip_beginfile;
159 static inline INSTRUCTION *list_create(INSTRUCTION *x);
160 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
161 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
162 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
164 extern double fmod(double x, double y);
166 * This string cannot occur as a real awk identifier.
167 * Use it as a special token to make function parsing
168 * uniform, but if it's seen, don't install the function.
170 * function split(x) { return x }
171 * function x(a) { return a }
172 * should only produce one error message, and not core dump.
174 static char builtin_func[] = "@builtin";
176 #define YYSTYPE INSTRUCTION *
179 %token FUNC_CALL NAME REGEXP FILENAME
180 %token YNUMBER YSTRING
181 %token RELOP IO_OUT IO_IN
182 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
184 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
185 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
186 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
187 %token LEX_BEGINFILE LEX_ENDFILE
188 %token LEX_GETLINE LEX_NEXTFILE
190 %token LEX_AND LEX_OR INCREMENT DECREMENT
191 %token LEX_BUILTIN LEX_LENGTH
193 %token LEX_INCLUDE LEX_EVAL
196 /* Lowest to highest */
197 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
203 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
206 %nonassoc RELOP '<' '>' IO_IN IO_OUT
208 %left YSTRING YNUMBER
213 %left INCREMENT DECREMENT
234 * If errors, give up, don't produce an infinite
235 * stream of syntax error messages.
244 (void) append_rule($1, $2);
246 | pattern statement_term
249 msg(_("%s blocks must have an action part"), ruletab[rule]);
251 } else if ($1 == NULL) {
252 msg(_("each rule must have a pattern or an action part"));
254 } else /* pattern rule with non-empty pattern */
255 (void) append_rule($1, NULL);
257 | function_prologue action
260 if ($1 && func_install($1, $2) < 0)
265 | '@' LEX_INCLUDE source statement_term
275 if (include_source($1) < 0)
289 { $$ = NULL; rule = Rule; }
291 { $$ = $1; rule = Rule; }
292 | exp ',' opt_nls exp
296 add_lint($1, LINT_assign_in_cond);
297 add_lint($4, LINT_assign_in_cond);
299 tp = instruction(Op_no_op);
300 list_prepend($1, bcalloc(Op_line_range, !!do_profiling + 1, 0));
301 $1->nexti->triggered = FALSE;
302 $1->nexti->target_jmp = $4->nexti;
304 list_append($1, instruction(Op_cond_pair));
305 $1->lasti->line_range = $1->nexti;
306 $1->lasti->target_jmp = tp;
308 list_append($4, instruction(Op_cond_pair));
309 $4->lasti->line_range = $1->nexti;
310 $4->lasti->target_jmp = tp;
312 ($1->nexti + 1)->condpair_left = $1->lasti;
313 ($1->nexti + 1)->condpair_right = $4->lasti;
315 $$ = list_append(list_merge($1, $4), tp);
320 static int begin_seen = 0;
321 if (do_lint_old && ++begin_seen == 2)
322 warning_ln($1->source_line,
323 _("old awk does not support multiple `BEGIN' or `END' rules"));
325 $1->in_rule = rule = BEGIN;
326 $1->source_file = source;
331 static int end_seen = 0;
332 if (do_lint_old && ++end_seen == 2)
333 warning_ln($1->source_line,
334 _("old awk does not support multiple `BEGIN' or `END' rules"));
336 $1->in_rule = rule = END;
337 $1->source_file = source;
342 $1->in_rule = rule = BEGINFILE;
343 $1->source_file = source;
348 $1->in_rule = rule = ENDFILE;
349 $1->source_file = source;
355 : l_brace statements r_brace opt_semi opt_nls
358 $$ = list_create(instruction(Op_no_op));
371 yyerror(_("`%s' is a built-in function, it cannot be redefined"),
373 $1->opcode = Op_symbol; /* Op_symbol instead of Op_token so that
374 * free_bc_internal does not try to free it
376 $1->lextok = builtin_func;
395 func_name '(' opt_param_list r_paren opt_nls
399 $1->source_file = source;
400 t = make_param($3->lextok);
404 t->rnode = func_params;
408 /* check for duplicate parameter names */
409 if (dup_parms($1, t))
416 * In this rule, want_regexp tells yylex that the next thing
417 * is a regexp so it should read up to the closing slash.
421 REGEXP /* The terminating '/' is consumed by yylex(). */
431 lintwarn_ln($3->source_line,
432 _("regexp constant `//' looks like a C++ comment, but is not"));
433 else if ((re)[0] == '*' && (re)[len-1] == '*')
434 /* possible C comment */
435 lintwarn_ln($3->source_line,
436 _("regexp constant `/%s/' looks like a C comment, but is not"), re);
439 exp = make_str_node(re, len, ALREADY_MALLOCED);
440 n = make_regnode(Node_regex, exp);
446 $$->opcode = Op_match_rec;
460 | statements statement
465 add_lint($2, LINT_no_effect);
469 $$ = list_merge($1, $2);
485 | l_brace statements r_brace
490 $$ = list_prepend($1, instruction(Op_exec_count));
494 | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
496 INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
497 INSTRUCTION *ip, *nextc, *tbreak;
498 const char **case_values = NULL;
503 tbreak = instruction(Op_no_op);
504 cstmt = list_create(tbreak);
505 cexp = list_create(instruction(Op_pop));
506 dflt = instruction(Op_jmp);
507 dflt->target_jmp = tbreak; /* if no case match and no explicit default */
511 bcfree($7); /* Op_list */
515 for(; curr != NULL; curr = nextc) {
516 INSTRUCTION *caseexp = curr->case_exp;
517 INSTRUCTION *casestmt = curr->case_stmt;
520 if (curr->opcode == Op_K_case) {
521 if (caseexp->opcode == Op_push_i) {
522 /* a constant scalar */
524 caseval = force_string(caseexp->memory)->stptr;
525 for (i = 0; i < case_count; i++) {
526 if (strcmp(caseval, case_values[i]) == 0)
527 error_ln(curr->source_line,
528 _("duplicate case values in switch body: %s"), caseval);
531 if (case_values == NULL)
532 emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
533 else if (case_count >= maxcount) {
535 erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
537 case_values[case_count++] = caseval;
539 /* match a constant regex against switch expression. */
540 (curr + 1)->match_exp = TRUE;
542 curr->stmt_start = casestmt->nexti;
543 curr->stmt_end = casestmt->lasti;
544 (void) list_prepend(cexp, curr);
545 (void) list_prepend(cexp, caseexp);
547 if (dflt->target_jmp != tbreak)
548 error_ln(curr->source_line,
549 _("duplicate `default' detected in switch body"));
551 dflt->target_jmp = casestmt->nexti;
554 curr->stmt_start = casestmt->nexti;
555 curr->stmt_end = casestmt->lasti;
556 (void) list_prepend(cexp, curr);
561 cstmt = list_merge(casestmt, cstmt);
564 if (case_values != NULL)
569 (void) list_prepend(ip, $1);
570 (void) list_prepend(ip, instruction(Op_exec_count));
571 $1->target_break = tbreak;
572 ($1 + 1)->switch_start = cexp->nexti;
573 ($1 + 1)->switch_end = cexp->lasti;
577 (void) list_append(cexp, dflt);
578 (void) list_merge(ip, cexp);
579 $$ = list_merge(ip, cstmt);
582 fix_break_continue(ip, tbreak, NULL);
584 | LEX_WHILE '(' exp r_paren opt_nls statement
599 INSTRUCTION *ip, *tbreak, *tcont;
601 tbreak = instruction(Op_no_op);
602 add_lint($3, LINT_assign_in_cond);
604 ip = list_append($3, instruction(Op_jmp_false));
605 ip->lasti->target_jmp = tbreak;
608 (void) list_append(ip, instruction(Op_exec_count));
609 $1->target_break = tbreak;
610 $1->target_continue = tcont;
611 ($1 + 1)->while_body = ip->lasti;
612 (void) list_prepend(ip, $1);
617 (void) list_merge(ip, $6);
618 (void) list_append(ip, instruction(Op_jmp));
619 ip->lasti->target_jmp = tcont;
620 $$ = list_append(ip, tbreak);
624 fix_break_continue(ip, tbreak, tcont);
626 | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
640 INSTRUCTION *ip, *tbreak, *tcont;
642 tbreak = instruction(Op_no_op);
644 add_lint($6, LINT_assign_in_cond);
646 ip = list_merge($3, $6);
648 ip = list_prepend($6, instruction(Op_no_op));
650 (void) list_prepend(ip, instruction(Op_exec_count));
651 (void) list_append(ip, instruction(Op_jmp_true));
652 ip->lasti->target_jmp = ip->nexti;
653 $$ = list_append(ip, tbreak);
657 fix_break_continue(ip, tbreak, tcont);
660 $1->target_break = tbreak;
661 $1->target_continue = tcont;
662 ($1 + 1)->doloop_cond = tcont;
663 $$ = list_prepend(ip, $1);
666 $1 and $4 are NULLs */
668 | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
671 char *var_name = $3->lextok;
674 && $8->lasti->opcode == Op_K_delete
675 && $8->lasti->expr_count == 1
676 && $8->nexti->opcode == Op_push
677 && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
678 && strcmp($8->nexti->memory->vname, var_name) == 0
681 /* Efficiency hack. Recognize the special case of
686 * and treat it as if it were
690 * Check that the body is a `delete a[i]' statement,
691 * and that both the loop var and array names match.
695 ip = $8->nexti->nexti;
696 if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
697 arr = $5->nexti->memory;
699 && ip->opcode == Op_no_op
700 && ip->nexti->opcode == Op_push_array
701 && strcmp(ip->nexti->memory->vname, arr->vname) == 0
702 && ip->nexti->nexti == $8->lasti
704 (void) make_assignable($8->nexti);
705 $8->lasti->opcode = Op_K_delete_loop;
706 $8->lasti->expr_count = 0;
717 INSTRUCTION *tbreak, *tcont;
719 /* [ Op_push_array a ]
720 * [ Op_arrayfor_init | ib ]
721 * ic:[ Op_arrayfor_incr | ib ]
722 * [ Op_var_assign if any ]
727 * ib:[Op_arrayfor_final ]
731 ip->nexti->opcode = Op_push_array;
733 tbreak = instruction(Op_arrayfor_final);
734 $4->opcode = Op_arrayfor_incr;
735 $4->array_var = variable(var_name, Node_var);
736 $4->target_jmp = tbreak;
738 $3->opcode = Op_arrayfor_init;
739 $3->target_jmp = tbreak;
740 (void) list_append(ip, $3);
743 $1->opcode = Op_K_arrayfor;
744 $1->target_continue = tcont;
745 $1->target_break = tbreak;
746 (void) list_append(ip, $1);
750 /* add update_FOO instruction if necessary */
751 if ($4->array_var->type == Node_var && $4->array_var->var_update) {
752 (void) list_append(ip, instruction(Op_var_update));
753 ip->lasti->update_var = $4->array_var->var_update;
755 (void) list_append(ip, $4);
757 /* add set_FOO instruction if necessary */
758 if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
759 (void) list_append(ip, instruction(Op_var_assign));
760 ip->lasti->assign_var = $4->array_var->var_assign;
764 (void) list_append(ip, instruction(Op_exec_count));
765 ($1 + 1)->forloop_cond = $4;
766 ($1 + 1)->forloop_body = ip->lasti;
770 (void) list_merge(ip, $8);
772 (void) list_append(ip, instruction(Op_jmp));
773 ip->lasti->target_jmp = $4;
774 $$ = list_append(ip, tbreak);
775 fix_break_continue(ip, tbreak, tcont);
781 | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
783 $$ = mk_for_loop($1, $3, $6, $9, $12);
788 | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
790 $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
798 $$ = list_prepend($1, instruction(Op_exec_count));
805 : LEX_BREAK statement_term
808 error_ln($1->source_line,
809 _("`break' is not allowed outside a loop or switch"));
810 $1->target_jmp = NULL;
811 $$ = list_create($1);
814 | LEX_CONTINUE statement_term
816 if (! continue_allowed)
817 error_ln($1->source_line,
818 _("`continue' is not allowed outside a loop"));
819 $1->target_jmp = NULL;
820 $$ = list_create($1);
823 | LEX_NEXT statement_term
825 /* if inside function (rule = 0), resolve context at run-time */
826 if (rule && rule != Rule)
827 error_ln($1->source_line,
828 _("`next' used in %s action"), ruletab[rule]);
829 $1->target_jmp = ip_rec;
830 $$ = list_create($1);
832 | LEX_NEXTFILE statement_term
835 error_ln($1->source_line,
836 _("`nextfile' is a gawk extension"));
838 /* if inside function (rule = 0), resolve context at run-time */
839 if (rule == BEGIN || rule == END || rule == ENDFILE)
840 error_ln($1->source_line,
841 _("`nextfile' used in %s action"), ruletab[rule]);
843 $1->target_newfile = ip_newfile;
844 $1->target_endfile = ip_endfile;
845 $$ = list_create($1);
847 | LEX_EXIT opt_exp statement_term
849 /* Initialize the two possible jump targets, the actual target
850 * is resolved at run-time.
852 $1->target_end = ip_end; /* first instruction in end_block */
853 $1->target_atexit = ip_atexit; /* cleanup and go home */
856 $$ = list_create($1);
857 (void) list_prepend($$, instruction(Op_push_i));
858 $$->nexti->memory = Nnull_string;
860 $$ = list_append($2, $1);
865 yyerror(_("`return' used outside function context"));
866 } opt_exp statement_term {
868 $$ = list_create($1);
869 (void) list_prepend($$, instruction(Op_push_i));
870 $$->nexti->memory = Nnull_string;
872 $$ = list_append($3, $1);
874 | simple_stmt statement_term
878 * A simple_stmt exists to satisfy a constraint in the POSIX
879 * grammar allowing them to occur as the 1st and 3rd parts
880 * in a `for (...;...;...)' loop. This is a historical oddity
881 * inherited from Unix awk, not at all documented in the AK&W
882 * awk book. We support it, as this was reported as a bug.
883 * We don't bother to document it though. So there.
886 : print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
889 * Optimization: plain `print' has no expression list, so $3 is null.
890 * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
891 * which is faster for these two cases.
894 if ($1->opcode == Op_K_print &&
896 || ($3->lasti->opcode == Op_field_spec
897 && $3->nexti->nexti->nexti == $3->lasti
898 && $3->nexti->nexti->opcode == Op_push_i
899 && $3->nexti->nexti->memory->type == Node_val
900 && $3->nexti->nexti->memory->numbr == 0.0)
903 static short warned = FALSE;
910 * [Op_K_print_rec | NULL | redir_type | expr_count]
914 bcfree($3->lasti); /* Op_field_spec */
915 $3->nexti->nexti->memory->flags &= ~PERM;
916 $3->nexti->nexti->memory->flags |= MALLOC;
917 unref($3->nexti->nexti->memory); /* Node_val */
918 bcfree($3->nexti->nexti); /* Op_push_i */
919 bcfree($3->nexti); /* Op_list */
920 bcfree($3); /* Op_list */
922 if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
924 lintwarn_ln($1->source_line,
925 _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
930 $1->opcode = Op_K_print_rec;
931 if ($4 == NULL) { /* no redircetion */
933 $$ = list_create($1);
937 $1->redir_type = ip->redir_type;
938 $4->nexti = ip->nexti;
940 $$ = list_append($4, $1);
947 * [ expression_list ]
949 * [$1 | NULL | redir_type | expr_count]
953 if ($4 == NULL) { /* no redirection */
954 if ($3 == NULL) { /* printf without arg */
957 $$ = list_create($1);
960 $1->expr_count = count_expressions(&t, FALSE);
962 $$ = list_append(t, $1);
967 $1->redir_type = ip->redir_type;
968 $4->nexti = ip->nexti;
972 $$ = list_append($4, $1);
975 $1->expr_count = count_expressions(&t, FALSE);
976 $$ = list_append(list_merge($4, t), $1);
982 | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
984 char *arr = $2->lextok;
986 $2->opcode = Op_push_array;
987 $2->memory = variable(arr, Node_var_new);
990 static short warned = FALSE;
992 if (do_lint && ! warned) {
994 lintwarn_ln($1->source_line,
995 _("`delete array' is a gawk extension"));
998 error_ln($1->source_line,
999 _("`delete array' is a gawk extension"));
1001 $$ = list_append(list_create($2), $1);
1003 $1->expr_count = sub_counter;
1004 $$ = list_append(list_append($4, $2), $1);
1007 | LEX_DELETE '(' NAME ')'
1009 * this is for tawk compatibility. maybe the warnings
1010 * should always be done.
1013 static short warned = FALSE;
1014 char *arr = $3->lextok;
1016 if (do_lint && ! warned) {
1018 lintwarn_ln($1->source_line,
1019 _("`delete(array)' is a non-portable tawk extension"));
1021 if (do_traditional) {
1022 error_ln($1->source_line,
1023 _("`delete array' is a gawk extension"));
1025 $3->memory = variable(arr, Node_var_new);
1026 $3->opcode = Op_push_array;
1028 $$ = list_append(list_create($3), $1);
1031 { $$ = optimize_assignment($1); }
1044 | case_statements case_statement
1047 $$ = list_create($2);
1049 $$ = list_prepend($1, $2);
1051 | case_statements error
1056 : LEX_CASE case_value colon opt_nls statements
1058 INSTRUCTION *casestmt = $5;
1060 casestmt = list_create(instruction(Op_no_op));
1062 (void) list_prepend(casestmt, instruction(Op_exec_count));
1064 $1->case_stmt = casestmt;
1068 | LEX_DEFAULT colon opt_nls statements
1070 INSTRUCTION *casestmt = $4;
1072 casestmt = list_create(instruction(Op_no_op));
1074 (void) list_prepend(casestmt, instruction(Op_exec_count));
1076 $1->case_stmt = casestmt;
1084 | '-' YNUMBER %prec UNARY
1086 $2->memory->numbr = -(force_number($2->memory));
1090 | '+' YNUMBER %prec UNARY
1099 $1->opcode = Op_push_re;
1112 * Note: ``print(x)'' is already parsed by the first rule,
1113 * so there is no good in covering it by the second one too.
1115 print_expression_list
1116 : opt_expression_list
1117 | '(' expression_list r_paren
1130 | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
1132 if ($1->redir_type == redirect_twoway
1133 && $3->lasti->opcode == Op_K_getline_redir
1134 && $3->lasti->redir_type == redirect_twoway)
1135 yyerror(_("multistage two-way pipelines don't work"));
1136 $$ = list_prepend($3, $1);
1141 : LEX_IF '(' exp r_paren opt_nls statement
1143 $$ = mk_condition($3, $1, $6, NULL, NULL);
1145 | LEX_IF '(' exp r_paren opt_nls statement
1146 LEX_ELSE opt_nls statement
1148 $$ = mk_condition($3, $1, $6, $7, $9);
1180 append_param($1->lextok);
1184 | param_list comma NAME
1186 append_param($3->lextok);
1192 { /* func_params = NULL; */ }
1194 { /* func_params = NULL; */ }
1195 | param_list comma error
1196 { /* func_params = NULL; */ }
1199 /* optional expression, as in for loop */
1216 { $$ = mk_expression_list(NULL, $1); }
1217 | expression_list comma exp
1219 $$ = mk_expression_list($1, $3);
1224 | expression_list error
1226 | expression_list error exp
1228 | expression_list comma error
1232 /* Expressions, not including the comma operator. */
1234 : variable assign_operator exp %prec ASSIGNOP
1236 if (do_lint && $3->lasti->opcode == Op_match_rec)
1237 lintwarn_ln($2->source_line,
1238 _("regular expression on right of assignment"));
1239 $$ = mk_assignment($1, $3, $2);
1242 { $$ = mk_boolean($1, $3, $2); }
1244 { $$ = mk_boolean($1, $3, $2); }
1247 if ($1->lasti->opcode == Op_match_rec)
1248 warning_ln($2->source_line,
1249 _("regular expression on left of `~' or `!~' operator"));
1251 if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
1252 $2->memory = $3->nexti->memory;
1253 bcfree($3->nexti); /* Op_match_rec */
1254 bcfree($3); /* Op_list */
1255 $$ = list_append($1, $2);
1257 $2->memory = make_regnode(Node_dynregex, NULL);
1258 $$ = list_append(list_merge($1, $3), $2);
1261 | exp LEX_IN simple_variable
1264 warning_ln($2->source_line,
1265 _("old awk does not support the keyword `in' except after `for'"));
1266 $3->nexti->opcode = Op_push_array;
1267 $2->opcode = Op_in_array;
1269 $$ = list_append(list_merge($1, $3), $2);
1271 | exp a_relop exp %prec RELOP
1273 if (do_lint && $3->lasti->opcode == Op_match_rec)
1274 lintwarn_ln($2->source_line,
1275 _("regular expression on right of comparison"));
1276 $$ = list_append(list_merge($1, $3), $2);
1278 | exp '?' exp ':' exp
1279 { $$ = mk_condition($1, $2, $3, $4, $5); }
1289 | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
1291 $2->opcode = Op_assign_quotient;
1315 | common_exp simp_exp %prec CONCAT_OP
1318 int is_simple_var = FALSE;
1320 if ($1->lasti->opcode == Op_concat) {
1321 /* multiple (> 2) adjacent strings optimization */
1322 is_simple_var = ($1->lasti->concat_flag & CSVAR);
1323 count = $1->lasti->expr_count + 1;
1324 $1->lasti->opcode = Op_no_op;
1326 is_simple_var = ($1->nexti->opcode == Op_push
1327 && $1->lasti == $1->nexti); /* first exp. is a simple
1328 * variable?; kludge for use
1329 * in Op_assign_concat.
1334 && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
1335 && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
1337 NODE *n1 = $1->nexti->memory;
1338 NODE *n2 = $2->nexti->memory;
1341 (void) force_string(n1);
1342 (void) force_string(n2);
1343 nlen = n1->stlen + n2->stlen;
1344 erealloc(n1->stptr, char *, nlen + 2, "constant fold");
1345 memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
1347 n1->stptr[nlen] = '\0';
1348 n1->flags &= ~(NUMCUR|NUMBER);
1349 n1->flags |= (STRING|STRCUR);
1352 n2->flags |= MALLOC;
1358 $$ = list_append(list_merge($1, $2), instruction(Op_concat));
1359 $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
1360 $$->lasti->expr_count = count;
1361 if (count > max_args)
1369 /* Binary operators in order of decreasing precedence. */
1370 | simp_exp '^' simp_exp
1371 { $$ = mk_binary($1, $3, $2); }
1372 | simp_exp '*' simp_exp
1373 { $$ = mk_binary($1, $3, $2); }
1374 | simp_exp '/' simp_exp
1375 { $$ = mk_binary($1, $3, $2); }
1376 | simp_exp '%' simp_exp
1377 { $$ = mk_binary($1, $3, $2); }
1378 | simp_exp '+' simp_exp
1379 { $$ = mk_binary($1, $3, $2); }
1380 | simp_exp '-' simp_exp
1381 { $$ = mk_binary($1, $3, $2); }
1382 | LEX_GETLINE opt_variable input_redir
1385 * In BEGINFILE/ENDFILE, allow `getline var < file'
1388 if (rule == BEGINFILE || rule == ENDFILE) {
1389 if ($2 != NULL && $3 != NULL)
1393 error_ln($1->source_line,
1394 _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
1396 error_ln($1->source_line,
1397 _("`getline' invalid inside `%s' rule"), ruletab[rule]);
1400 if (do_lint && rule == END && $3 == NULL)
1401 lintwarn_ln($1->source_line,
1402 _("non-redirected `getline' undefined inside END action"));
1403 $$ = mk_getline($1, $2, $3, redirect_input);
1405 | variable INCREMENT
1407 $2->opcode = Op_postincrement;
1408 $$ = mk_assignment($1, NULL, $2);
1410 | variable DECREMENT
1412 $2->opcode = Op_postdecrement;
1413 $$ = mk_assignment($1, NULL, $2);
1415 | '(' expression_list r_paren LEX_IN simple_variable
1418 warning_ln($4->source_line,
1419 _("old awk does not support the keyword `in' except after `for'"));
1420 warning_ln($4->source_line,
1421 _("old awk does not support multidimensional arrays"));
1423 $5->nexti->opcode = Op_push_array;
1424 $4->opcode = Op_in_array;
1425 if ($2 == NULL) { /* error */
1428 $$ = list_merge($5, $4);
1430 INSTRUCTION *t = $2;
1431 $4->expr_count = count_expressions(&t, FALSE);
1432 $$ = list_append(list_merge(t, $5), $4);
1437 /* Expressions containing "| getline" lose the ability to be on the
1438 right-hand side of a concatenation. */
1440 : common_exp IO_IN LEX_GETLINE opt_variable
1442 $$ = mk_getline($3, $4, $1, $2->redir_type);
1445 /* Binary operators in order of decreasing precedence. */
1446 | simp_exp_nc '^' simp_exp
1447 { $$ = mk_binary($1, $3, $2); }
1448 | simp_exp_nc '*' simp_exp
1449 { $$ = mk_binary($1, $3, $2); }
1450 | simp_exp_nc '/' simp_exp
1451 { $$ = mk_binary($1, $3, $2); }
1452 | simp_exp_nc '%' simp_exp
1453 { $$ = mk_binary($1, $3, $2); }
1454 | simp_exp_nc '+' simp_exp
1455 { $$ = mk_binary($1, $3, $2); }
1456 | simp_exp_nc '-' simp_exp
1457 { $$ = mk_binary($1, $3, $2); }
1463 $$ = list_create($1);
1465 | '!' simp_exp %prec UNARY
1467 if ($2->opcode == Op_match_rec) {
1468 $2->opcode = Op_nomatch;
1469 $1->opcode = Op_push_i;
1470 $1->memory = mk_number(0.0, (PERM|NUMCUR|NUMBER));
1471 $$ = list_append(list_append(list_create($1),
1472 instruction(Op_field_spec)), $2);
1474 if (do_optimize > 1 && $2->nexti == $2->lasti
1475 && $2->nexti->opcode == Op_push_i
1477 NODE *n = $2->nexti->memory;
1478 if ((n->flags & (STRCUR|STRING)) != 0) {
1479 n->numbr = (AWKNUM) (n->stlen == 0);
1480 n->flags &= ~(STRCUR|STRING);
1481 n->flags |= (NUMCUR|NUMBER);
1486 n->numbr = (AWKNUM) (n->numbr == 0.0);
1490 $1->opcode = Op_not;
1491 add_lint($2, LINT_assign_in_cond);
1492 $$ = list_append($2, $1);
1498 | LEX_BUILTIN '(' opt_expression_list r_paren
1504 | LEX_LENGTH '(' opt_expression_list r_paren
1512 static short warned1 = FALSE;
1514 if (do_lint && ! warned1) {
1516 lintwarn_ln($1->source_line,
1517 _("call of `length' without parentheses is not portable"));
1519 $$ = snode(NULL, $1);
1525 | INCREMENT variable
1527 $1->opcode = Op_preincrement;
1528 $$ = mk_assignment($2, NULL, $1);
1530 | DECREMENT variable
1532 $1->opcode = Op_predecrement;
1533 $$ = mk_assignment($2, NULL, $1);
1537 $$ = list_create($1);
1541 $$ = list_create($1);
1543 | '-' simp_exp %prec UNARY
1545 if ($2->lasti->opcode == Op_push_i
1546 && ($2->lasti->memory->flags & (STRCUR|STRING)) == 0) {
1547 $2->lasti->memory->numbr = -(force_number($2->lasti->memory));
1551 $1->opcode = Op_unary_minus;
1552 $$ = list_append($2, $1);
1555 | '+' simp_exp %prec UNARY
1559 * POSIX semantics: force a conversion to numeric type
1561 $1->opcode = Op_plus_i;
1562 $1->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
1563 $$ = list_append($2, $1);
1570 func_use($1->lasti->func_name, FUNC_USE);
1573 | '@' direct_func_call
1575 /* indirect function call */
1579 static short warned = FALSE;
1580 const char *msg = _("indirect function calls are a gawk extension");
1582 if (do_traditional || do_posix)
1584 else if (do_lint && ! warned) {
1586 lintwarn("%s", msg);
1590 f->opcode = Op_indirect_func_call;
1591 name = estrdup(f->func_name, strlen(f->func_name));
1592 if (is_std_var(name))
1593 yyerror(_("can not use special variable `%s' for indirect function call"), name);
1594 indirect_var = variable(name, Node_var_new);
1595 t = instruction(Op_push);
1596 t->memory = indirect_var;
1598 /* prepend indirect var instead of appending to arguments (opt_expression_list),
1599 * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
1604 $$ = list_prepend($2, t);
1609 : FUNC_CALL '(' opt_expression_list r_paren
1612 $1->opcode = Op_func_call;
1613 $1->func_body = NULL;
1614 if ($3 == NULL) { /* no argument or error */
1615 ($1 + 1)->expr_count = 0;
1616 $$ = list_create($1);
1618 INSTRUCTION *t = $3;
1619 ($1 + 1)->expr_count = count_expressions(&t, TRUE);
1620 $$ = list_append(t, $1);
1632 delete_subscript_list
1635 | delete_subscript SUBSCRIPT
1642 | delete_subscript delete_exp_list
1644 $$ = list_merge($1, $2);
1649 : bracketed_exp_list
1651 INSTRUCTION *ip = $1->lasti;
1652 int count = ip->sub_count; /* # of SUBSEP-seperated expressions */
1654 /* change Op_subscript or Op_sub_array to Op_concat */
1655 ip->opcode = Op_concat;
1656 ip->concat_flag = CSUBSEP;
1657 ip->expr_count = count;
1659 ip->opcode = Op_no_op;
1660 sub_counter++; /* count # of dimensions */
1666 : '[' expression_list ']'
1668 INSTRUCTION *t = $2;
1670 error_ln($3->source_line,
1671 _("invalid subscript expression"));
1672 /* install Null string as subscript. */
1673 t = list_create(instruction(Op_push_i));
1674 t->nexti->memory = Nnull_string;
1677 $3->sub_count = count_expressions(&t, FALSE);
1678 $$ = list_append(t, $3);
1683 : bracketed_exp_list
1685 | subscript bracketed_exp_list
1687 $$ = list_merge($1, $2);
1692 : subscript SUBSCRIPT
1699 char *var_name = $1->lextok;
1701 $1->opcode = Op_push;
1702 $1->memory = variable(var_name, Node_var_new);
1703 $$ = list_create($1);
1705 | NAME subscript_list
1709 char *arr = $1->lextok;
1710 if ((n = lookup(arr)) != NULL && ! isarray(n))
1711 yyerror(_("use of non-array as array"));
1712 $1->memory = variable(arr, Node_var_new);
1713 $1->opcode = Op_push_array;
1714 $$ = list_prepend($2, $1);
1721 INSTRUCTION *ip = $1->nexti;
1722 if (ip->opcode == Op_push
1723 && ip->memory->type == Node_var
1724 && ip->memory->var_update
1726 $$ = list_prepend($1, instruction(Op_var_update));
1727 $$->nexti->update_var = ip->memory->var_update;
1731 | '$' non_post_simp_exp opt_incdec
1733 $$ = list_append($2, $1);
1735 mk_assignment($2, NULL, $3);
1742 $1->opcode = Op_postincrement;
1746 $1->opcode = Op_postdecrement;
1748 | /* empty */ { $$ = NULL; }
1756 : '}' opt_nls { yyerrok; }
1773 : ':' { $$ = $1; yyerrok; }
1777 : ',' opt_nls { yyerrok; }
1782 const char *operator; /* text to match */
1783 OPCODE value; /* type */
1784 int class; /* lexical class */
1785 unsigned flags; /* # of args. allowed and compatability */
1786 # define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
1787 # define A(n) (1<<(n))
1788 # define VERSION_MASK 0xFF00 /* old awk is zero */
1789 # define NOT_OLD 0x0100 /* feature not in old awk */
1790 # define NOT_POSIX 0x0200 /* feature not in POSIX */
1791 # define GAWKX 0x0400 /* gawk extension */
1792 # define RESX 0x0800 /* Bell Labs Research extension */
1793 # define BREAK 0x1000 /* break allowed inside */
1794 # define CONTINUE 0x2000 /* continue allowed inside */
1796 NODE *(*ptr)(int); /* function that implements this keyword */
1799 #if 'a' == 0x81 /* it's EBCDIC */
1800 /* tokcompare --- lexicographically compare token names for sorting */
1803 tokcompare(const void *l, const void *r)
1805 struct token *lhs, *rhs;
1807 lhs = (struct token *) l;
1808 rhs = (struct token *) r;
1810 return strcmp(lhs->operator, rhs->operator);
1815 * Tokentab is sorted ASCII ascending order, so it can be binary searched.
1816 * See check_special(), which sorts the table on EBCDIC systems.
1817 * Function pointers come from declarations in awk.h.
1820 static const struct token tokentab[] = {
1821 {"BEGIN", Op_rule, LEX_BEGIN, 0, 0},
1822 {"BEGINFILE", Op_rule, LEX_BEGINFILE, GAWKX, 0},
1823 {"END", Op_rule, LEX_END, 0, 0},
1824 {"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0},
1826 {"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump},
1828 {"and", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_and},
1829 {"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort},
1830 {"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asorti},
1831 {"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
1832 {"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain},
1833 {"break", Op_K_break, LEX_BREAK, 0, 0},
1834 {"case", Op_K_case, LEX_CASE, GAWKX, 0},
1835 {"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close},
1836 {"compl", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl},
1837 {"continue", Op_K_continue, LEX_CONTINUE, 0, 0},
1838 {"cos", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
1839 {"dcgettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext},
1840 {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
1841 {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0},
1842 {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0},
1843 {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0},
1844 {"else", Op_K_else, LEX_ELSE, 0, 0},
1845 {"eval", Op_symbol, LEX_EVAL, 0, 0},
1846 {"exit", Op_K_exit, LEX_EXIT, 0, 0},
1847 {"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp},
1848 {"extension", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext},
1849 {"fflush", Op_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
1850 {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0},
1851 {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
1852 {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0},
1853 {"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0},
1854 {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0},
1855 {"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
1856 {"if", Op_K_if, LEX_IF, 0, 0},
1857 {"in", Op_symbol, LEX_IN, 0, 0},
1858 {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0},
1859 {"index", Op_builtin, LEX_BUILTIN, A(2), do_index},
1860 {"int", Op_builtin, LEX_BUILTIN, A(1), do_int},
1861 {"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray},
1862 {"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length},
1863 {"log", Op_builtin, LEX_BUILTIN, A(1), do_log},
1864 {"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift},
1865 {"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match},
1866 {"mktime", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime},
1867 {"next", Op_K_next, LEX_NEXT, 0, 0},
1868 {"nextfile", Op_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
1869 {"or", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_or},
1870 {"patsplit", Op_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit},
1871 {"print", Op_K_print, LEX_PRINT, 0, 0},
1872 {"printf", Op_K_printf, LEX_PRINTF, 0, 0},
1873 {"rand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
1874 {"return", Op_K_return, LEX_RETURN, NOT_OLD, 0},
1875 {"rshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift},
1876 {"sin", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
1877 {"split", Op_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split},
1878 {"sprintf", Op_builtin, LEX_BUILTIN, 0, do_sprintf},
1879 {"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt},
1880 {"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
1881 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
1882 {"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme},
1884 {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
1885 {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
1886 {"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
1887 {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
1888 {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0},
1889 {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
1890 {"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
1891 {"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
1892 {"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
1893 {"while", Op_K_while, LEX_WHILE, BREAK|CONTINUE, 0},
1894 {"xor", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
1898 /* Variable containing the current shift state. */
1899 static mbstate_t cur_mbstate;
1900 /* Ring buffer containing current characters. */
1901 #define MAX_CHAR_IN_RING_BUFFER 8
1902 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1903 static char cur_char_ring[RING_BUFFER_SIZE];
1904 /* Index for ring buffers. */
1905 static int cur_ring_idx;
1906 /* This macro means that last nextc() return a singlebyte character
1907 or 1st byte of a multibyte character. */
1908 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1909 #else /* MBS_SUPPORT */
1911 #define nextc_is_1stbyte 1
1912 #endif /* MBS_SUPPORT */
1914 /* getfname --- return name of a builtin function (for pretty printing) */
1917 getfname(NODE *(*fptr)(int))
1921 j = sizeof(tokentab) / sizeof(tokentab[0]);
1922 /* linear search, no other way to do it */
1923 for (i = 0; i < j; i++)
1924 if (tokentab[i].ptr == fptr)
1925 return tokentab[i].operator;
1930 /* print_included_from --- print `Included from ..' file names and locations */
1933 print_included_from()
1938 /* suppress current file name, line # from `.. included from ..' msgs */
1939 saveline = sourceline;
1942 for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
1944 if (s == NULL || s->fd <= INVALID_HANDLE)
1948 /* if last token is NEWLINE, line number is off by 1. */
1949 if (s->lasttok == NEWLINE)
1952 s->prev == sourcefile ? "In file included from"
1954 (s->stype == SRC_INC ||
1955 s->stype == SRC_FILE) ? s->src : "cmd. line",
1957 s->stype == SRC_INC ? ',' : ':'
1960 sourceline = saveline;
1963 /* warning_ln --- print a warning message with location */
1966 warning_ln(int line, const char *mesg, ...)
1971 saveline = sourceline;
1973 print_included_from();
1974 va_start(args, mesg);
1975 err(_("warning: "), mesg, args);
1977 sourceline = saveline;
1980 /* lintwarn_ln --- print a lint warning and location */
1983 lintwarn_ln(int line, const char *mesg, ...)
1988 saveline = sourceline;
1990 print_included_from();
1991 va_start(args, mesg);
1992 if (lintfunc == r_fatal)
1993 err(_("fatal: "), mesg, args);
1995 err(_("warning: "), mesg, args);
1997 sourceline = saveline;
1998 if (lintfunc == r_fatal)
1999 gawk_exit(EXIT_FATAL);
2002 /* error_ln --- print an error message and location */
2005 error_ln(int line, const char *m, ...)
2010 saveline = sourceline;
2012 print_included_from();
2015 err("error: ", m, args);
2017 sourceline = saveline;
2020 /* yyerror --- print a syntax error message, show where */
2023 yyerror(const char *m, ...)
2026 const char *mesg = NULL;
2031 static char end_of_file_line[] = "(END OF FILE)";
2034 print_included_from();
2037 /* Find the current line in the input file */
2038 if (lexptr && lexeme) {
2039 if (thisline == NULL) {
2043 mesg = _("unexpected newline or end of string");
2045 for (; cp != lexptr_begin && *cp != '\n'; --cp)
2051 /* NL isn't guaranteed */
2053 while (bp < lexend && *bp && *bp != '\n')
2056 thisline = end_of_file_line;
2057 bp = thisline + strlen(thisline);
2061 * Saving and restoring *bp keeps valgrind happy,
2062 * since the guts of glibc uses strlen, even though
2063 * we're passing an explict precision. Sigh.
2065 * 8/2003: We may not need this anymore.
2070 msg("%.*s", (int) (bp - thisline), thisline);
2077 count = (bp - thisline) + strlen(mesg) + 2 + 1;
2078 emalloc(buf, char *, count, "yyerror");
2082 if (lexptr != NULL) {
2084 while (scan < lexeme)
2085 if (*scan++ == '\t')
2098 /* mk_program --- create a single list of instructions */
2100 static INSTRUCTION *
2103 INSTRUCTION *cp, *tmp;
2105 #define begin_block rule_block[BEGIN]
2106 #define end_block rule_block[END]
2107 #define prog_block rule_block[Rule]
2108 #define beginfile_block rule_block[BEGINFILE]
2109 #define endfile_block rule_block[ENDFILE]
2111 if (end_block == NULL)
2112 end_block = list_create(ip_end);
2114 (void) list_prepend(end_block, ip_end);
2116 if (! in_main_context()) {
2117 if (begin_block != NULL && prog_block != NULL)
2118 cp = list_merge(begin_block, prog_block);
2120 cp = (begin_block != NULL) ? begin_block : prog_block;
2123 (void) list_merge(cp, end_block);
2127 (void) list_append(cp, instruction(Op_stop));
2131 if (endfile_block == NULL)
2132 endfile_block = list_create(ip_endfile);
2134 ip_rec->has_endfile = TRUE;
2135 (void) list_prepend(endfile_block, ip_endfile);
2138 if (beginfile_block == NULL)
2139 beginfile_block = list_create(ip_beginfile);
2141 (void) list_prepend(beginfile_block, ip_beginfile);
2143 if (prog_block == NULL) {
2144 if (end_block->nexti == end_block->lasti
2145 && beginfile_block->nexti == beginfile_block->lasti
2146 && endfile_block->nexti == endfile_block->lasti
2148 /* no pattern-action and (real) end, beginfile or endfile blocks */
2151 ip_rec = ip_newfile = NULL;
2153 list_append(beginfile_block, instruction(Op_after_beginfile));
2154 (void) list_append(endfile_block, instruction(Op_after_endfile));
2156 if (begin_block == NULL) /* no program at all */
2159 cp = list_merge(begin_block, end_block);
2160 (void) list_append(cp, ip_atexit);
2161 (void) list_append(cp, instruction(Op_stop));
2163 /* append beginfile_block and endfile_block for sole use
2164 * in getline without redirection (Op_K_getline).
2167 (void) list_merge(cp, beginfile_block);
2168 (void) list_merge(cp, endfile_block);
2173 /* install a do-nothing prog block */
2174 prog_block = list_create(instruction(Op_no_op));
2178 (void) list_append(endfile_block, instruction(Op_after_endfile));
2179 (void) list_prepend(prog_block, ip_rec);
2180 (void) list_append(prog_block, instruction(Op_jmp));
2181 prog_block->lasti->target_jmp = ip_rec;
2183 list_append(beginfile_block, instruction(Op_after_beginfile));
2185 cp = list_merge(beginfile_block, prog_block);
2186 (void) list_prepend(cp, ip_newfile);
2187 (void) list_merge(cp, endfile_block);
2188 (void) list_merge(cp, end_block);
2189 if (begin_block != NULL)
2190 cp = list_merge(begin_block, cp);
2192 (void) list_append(cp, ip_atexit);
2193 (void) list_append(cp, instruction(Op_stop));
2196 /* delete the Op_list, not needed */
2204 #undef beginfile_block
2205 #undef endfile_block
2208 /* parse_program --- read in the program and convert into a list of instructions */
2211 parse_program(INSTRUCTION **pcode)
2215 /* pre-create non-local jump targets
2216 * ip_end (Op_no_op) -- used as jump target for `exit'
2217 * outside an END block.
2219 ip_end = instruction(Op_no_op);
2221 if (! in_main_context())
2222 ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
2224 ip_endfile = instruction(Op_no_op);
2225 ip_beginfile = instruction(Op_no_op);
2226 ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
2227 ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
2228 ip_newfile->target_jmp = ip_end;
2229 ip_newfile->target_endfile = ip_endfile;
2230 (ip_newfile + 1)->target_get_record = ip_rec;
2231 ip_rec->target_newfile = ip_newfile;
2232 ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */
2235 sourcefile = srcfiles->next;
2239 memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
2241 tok = tokstart != NULL ? tokstart : tokexpand();
2244 *pcode = mk_program();
2246 /* avoid false source indications */
2249 if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */
2252 return (ret || errcount);
2255 /* do_add_srcfile --- add one item to srcfiles */
2258 do_add_srcfile(int stype, char *src, char *path, SRCFILE *thisfile)
2262 emalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
2263 memset(s, 0, sizeof(SRCFILE));
2264 s->src = estrdup(src, strlen(src));
2267 s->fd = INVALID_HANDLE;
2269 s->prev = thisfile->prev;
2270 thisfile->prev->next = s;
2275 /* add_srcfile --- add one item to srcfiles after checking if
2276 * a source file exists and not already in list.
2280 add_srcfile(int stype, char *src, SRCFILE *thisfile, int *already_included, int *errcode)
2287 if (already_included)
2288 *already_included = FALSE;
2291 if (stype == SRC_CMDLINE || stype == SRC_STDIN)
2292 return do_add_srcfile(stype, src, NULL, thisfile);
2294 path = find_source(src, &sbuf, &errno_val);
2297 *errcode = errno_val;
2300 fatal(_("can't open source file `%s' for reading (%s)"),
2301 src, errno_val ? strerror(errno_val) : _("reason unknown"));
2304 for (s = srcfiles->next; s != srcfiles; s = s->next) {
2305 if ((s->stype == SRC_FILE || s->stype == SRC_INC)
2306 && files_are_same(path, s)
2309 int line = sourceline;
2310 /* Kludge: the line number may be off for `@include file'.
2311 * Since, this function is also used for '-f file' in main.c,
2312 * sourceline > 1 check ensures that the call is at
2315 if (sourceline > 1 && lasttok == NEWLINE)
2317 lintwarn_ln(line, _("already included source file `%s'"), src);
2320 if (already_included)
2321 *already_included = TRUE;
2326 s = do_add_srcfile(stype, src, path, thisfile);
2328 s->mtime = sbuf.st_mtime;
2332 /* include_source --- read program from source included using `@include' */
2335 include_source(INSTRUCTION *file)
2338 char *src = file->lextok;
2340 int already_included;
2342 if (do_traditional || do_posix) {
2343 error_ln(file->source_line, _("@include is a gawk extension"));
2347 if (strlen(src) == 0) {
2349 lintwarn_ln(file->source_line, _("empty filename after @include"));
2353 s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
2355 if (already_included)
2357 error_ln(file->source_line,
2358 _("can't open source file `%s' for reading (%s)"),
2359 src, errcode ? strerror(errcode) : _("reason unknown"));
2363 /* save scanner state for the current sourcefile */
2364 sourcefile->srclines = sourceline;
2365 sourcefile->lexptr = lexptr;
2366 sourcefile->lexend = lexend;
2367 sourcefile->lexptr_begin = lexptr_begin;
2368 sourcefile->lexeme = lexeme;
2369 sourcefile->lasttok = lasttok;
2371 /* included file becomes the current source */
2382 /* next_sourcefile --- read program from the next source in srcfiles */
2387 static int (*closefunc)(int fd) = NULL;
2389 if (closefunc == NULL) {
2390 char *cp = getenv("AWKREADFUNC");
2392 /* If necessary, one day, test value for different functions. */
2396 closefunc = one_line_close;
2400 * This won't be true if there's an invalid character in
2401 * the source file or source string (e.g., user typo).
2402 * Previous versions of gawk did not core dump in such a
2405 * assert(lexeof == TRUE);
2409 sourcefile->srclines = sourceline; /* total no of lines in current file */
2410 if (sourcefile->fd > INVALID_HANDLE) {
2411 if (sourcefile->fd != fileno(stdin)) /* safety */
2412 (*closefunc)(sourcefile->fd);
2413 sourcefile->fd = INVALID_HANDLE;
2415 if (sourcefile->buf != NULL) {
2416 efree(sourcefile->buf);
2417 sourcefile->buf = NULL;
2418 sourcefile->lexptr_begin = NULL;
2421 sourcefile = sourcefile->next;
2422 if (sourcefile == srcfiles)
2425 if (sourcefile->lexptr_begin != NULL) {
2426 /* resume reading from already opened file (postponed to process '@include') */
2427 lexptr = sourcefile->lexptr;
2428 lexend = sourcefile->lexend;
2429 lasttok = sourcefile->lasttok;
2430 lexptr_begin = sourcefile->lexptr_begin;
2431 lexeme = sourcefile->lexeme;
2432 sourceline = sourcefile->srclines;
2433 source = sourcefile->src;
2442 /* get_src_buf --- read the next buffer of source program */
2454 * No argument prototype on readfunc on purpose,
2455 * avoids problems with some ancient systems where
2456 * the types of arguments to read() aren't up to date.
2458 static ssize_t (*readfunc)() = 0;
2460 if (readfunc == NULL) {
2461 char *cp = getenv("AWKREADFUNC");
2463 /* If necessary, one day, test value for different functions. */
2466 * cast is to remove warnings on systems with
2467 * different return types for read.
2469 readfunc = ( ssize_t(*)() ) read;
2471 readfunc = read_one_line;
2475 if (sourcefile == srcfiles)
2478 if (sourcefile->stype == SRC_CMDLINE) {
2479 if (sourcefile->bufsize == 0) {
2480 sourcefile->bufsize = strlen(sourcefile->src);
2481 lexptr = lexptr_begin = lexeme = sourcefile->src;
2482 lexend = lexptr + sourcefile->bufsize;
2484 if (sourcefile->bufsize == 0) {
2486 * Yet Another Special case:
2487 * gawk '' /path/name
2490 static short warned = FALSE;
2492 if (do_lint && ! warned) {
2494 lintwarn(_("empty program text on command line"));
2498 } else if (sourcefile->buf == NULL && *(lexptr-1) != '\n') {
2500 * The following goop is to ensure that the source
2501 * ends with a newline and that the entire current
2502 * line is available for error messages.
2507 offset = lexptr - lexeme;
2508 for (scan = lexeme; scan > lexptr_begin; scan--)
2509 if (*scan == '\n') {
2513 savelen = lexptr - scan;
2514 emalloc(buf, char *, savelen + 1, "get_src_buf");
2515 memcpy(buf, scan, savelen);
2517 lexptr = buf + savelen;
2519 lexeme = lexptr - offset;
2521 lexend = lexptr + 1;
2522 sourcefile->buf = buf;
2528 if (sourcefile->fd <= INVALID_HANDLE) {
2532 source = sourcefile->src;
2535 fd = srcopen(sourcefile);
2536 if (fd <= INVALID_HANDLE) {
2539 /* suppress file name and line no. in error mesg */
2542 error(_("can't open source file `%s' for reading (%s)"),
2543 in, strerror(errno));
2546 return sourcefile->src;
2549 sourcefile->fd = fd;
2550 l = optimal_bufsize(fd, &sbuf);
2552 * Make sure that something silly like
2553 * AWKBUFSIZE=8 make check
2556 #define A_DECENT_BUFFER_SIZE 128
2557 if (l < A_DECENT_BUFFER_SIZE)
2558 l = A_DECENT_BUFFER_SIZE;
2559 #undef A_DECENT_BUFFER_SIZE
2560 sourcefile->bufsize = l;
2562 emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2563 lexptr = lexptr_begin = lexeme = sourcefile->buf;
2569 * Here, we retain the current source line in the beginning of the buffer.
2572 for (scan = lexeme; scan > lexptr_begin; scan--)
2573 if (*scan == '\n') {
2578 savelen = lexptr - scan;
2579 offset = lexptr - lexeme;
2583 * Need to make sure we have room left for reading new text;
2584 * grow the buffer (by doubling, an arbitrary choice), if the retained line
2585 * takes up more than a certain percentage (50%, again an arbitrary figure)
2586 * of the available space.
2589 if (savelen > sourcefile->bufsize / 2) { /* long line or token */
2590 sourcefile->bufsize *= 2;
2591 erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2592 scan = sourcefile->buf + (scan - lexptr_begin);
2593 lexptr_begin = sourcefile->buf;
2596 thisline = lexptr_begin;
2597 memmove(thisline, scan, savelen);
2598 lexptr = thisline + savelen;
2599 lexeme = lexptr - offset;
2602 lexptr = lexeme = lexptr_begin;
2607 n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
2609 error(_("can't read sourcefile `%s' (%s)"),
2610 source, strerror(errno));
2614 lexend = lexptr + n;
2616 static short warned = FALSE;
2617 if (do_lint && newfile && ! warned){
2620 lintwarn(_("source file `%s' is empty"), source);
2625 return sourcefile->buf;
2628 /* tokadd --- add a character to the token buffer */
2630 #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
2632 /* tokexpand --- grow the token buffer */
2640 if (tokstart != NULL) {
2641 tokoffset = tok - tokstart;
2643 erealloc(tokstart, char *, toksize, "tokexpand");
2644 tok = tokstart + tokoffset;
2647 emalloc(tokstart, char *, toksize, "tokexpand");
2650 tokend = tokstart + toksize;
2654 /* nextc --- get the next input character */
2661 if (gawk_mb_cur_max > 1) {
2665 if (lexptr == NULL || lexptr >= lexend) {
2671 /* Update the buffer index. */
2672 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
2675 /* Did we already check the current character? */
2676 if (cur_char_ring[cur_ring_idx] == 0) {
2677 /* No, we need to check the next character on the buffer. */
2678 int idx, work_ring_idx = cur_ring_idx;
2679 mbstate_t tmp_state;
2682 for (idx = 0 ; lexptr + idx < lexend ; idx++) {
2683 tmp_state = cur_mbstate;
2684 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
2686 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
2687 /* It is a singlebyte character, non-complete multibyte
2688 character or EOF. We treat it as a singlebyte
2690 cur_char_ring[work_ring_idx] = 1;
2692 } else if (mbclen == (size_t)-2) {
2693 /* It is not a complete multibyte character. */
2694 cur_char_ring[work_ring_idx] = idx + 1;
2697 cur_char_ring[work_ring_idx] = mbclen;
2700 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2701 0 : work_ring_idx + 1;
2703 cur_mbstate = tmp_state;
2705 /* Put a mark on the position on which we write next character. */
2706 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2707 0 : work_ring_idx + 1;
2708 cur_char_ring[work_ring_idx] = 0;
2711 return (int) (unsigned char) *lexptr++;
2716 if (lexptr && lexptr < lexend)
2717 return ((int) (unsigned char) *lexptr++);
2718 } while (get_src_buf());
2723 #else /* MBS_SUPPORT */
2731 if (lexptr && lexptr < lexend)
2732 return ((int) (unsigned char) *lexptr++);
2733 } while (get_src_buf());
2737 #endif /* MBS_SUPPORT */
2739 /* pushback --- push a character back on the input */
2745 if (gawk_mb_cur_max > 1)
2746 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
2749 (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
2753 /* allow_newline --- allow newline after &&, ||, ? and : */
2762 if (c == END_FILE) {
2767 while ((c = nextc()) != '\n' && c != END_FILE)
2769 if (c == END_FILE) {
2783 /* newline_eof --- return newline or EOF as needed and adjust variables */
2786 * This routine used to be a macro, however GCC 4.6.2 warned about
2787 * the result of a computation not being used. Converting to a function
2788 * removes the warnings.
2791 static int newline_eof()
2793 /* NB: a newline at end does not start a source line. */
2794 if (lasttok != NEWLINE) {
2796 if (do_lint && ! eof_warned) {
2797 lintwarn(_("source file does not end in newline"));
2809 /* yylex --- Read the input and turn it into tokens. */
2815 int seen_e = FALSE; /* These are for numbers */
2816 int seen_point = FALSE;
2817 int esc_seen; /* for literal strings */
2819 static int did_newline = FALSE;
2822 int intlstr = FALSE;
2824 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
2826 #define NEWLINE_EOF newline_eof()
2828 yylval = (INSTRUCTION *) NULL;
2829 if (lasttok == SUBSCRIPT) {
2834 if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
2841 return lasttok = NEWLINE_EOF;
2846 * added for OS/2's extproc feature of cmd.exe
2847 * (like #! in BSD sh)
2849 if (strncasecmp(lexptr, "extproc ", 8) == 0) {
2850 while (*lexptr && *lexptr != '\n')
2858 int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
2860 * Counting brackets is non-trivial. [[] is ok,
2861 * and so is [\]], with a point being that /[/]/ as a regexp
2862 * constant has to work.
2864 * Do not count [ or ] if either one is preceded by a \.
2865 * A `[' should be counted if
2866 * a) it is the first one so far (in_brack == 0)
2867 * b) it is the `[' in `[:'
2868 * A ']' should be counted if not preceded by a \, since
2869 * it is either closing `:]' or just a plain list.
2870 * According to POSIX, []] is how you put a ] into a set.
2871 * Try to handle that too.
2873 * The code for \ handles \[ and \].
2876 want_regexp = FALSE;
2881 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
2883 /* one day check for `.' and `=' too */
2884 if (nextc() == ':' || in_brack == 0)
2889 if (tokstart[0] == '['
2890 && (tok == tokstart + 1
2891 || (tok == tokstart + 2
2892 && tokstart[1] == '^')))
2898 if ((c = nextc()) == END_FILE) {
2900 yyerror(_("unterminated regexp ends with `\\' at end of file"));
2901 goto end_regexp; /* kludge */
2902 } else if (c == '\n') {
2911 case '/': /* end of the regexp */
2915 yylval = GET_INSTRUCTION(Op_token);
2916 yylval->lextok = estrdup(tokstart, tok - tokstart);
2921 if (peek == 'i' || peek == 's') {
2924 _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
2925 source, sourceline, peek);
2928 _("tawk regex modifier `/.../%c' doesn't work in gawk"),
2932 return lasttok = REGEXP;
2935 yyerror(_("unterminated regexp"));
2936 goto end_regexp; /* kludge */
2939 yyerror(_("unterminated regexp at end of file"));
2940 goto end_regexp; /* kludge */
2947 /* skipping \r is a hack, but windows is just too pervasive. sigh. */
2948 while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
2951 lexeme = lexptr ? lexptr - 1 : lexptr;
2956 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
2963 return lasttok = NEWLINE_EOF;
2967 return lasttok = NEWLINE;
2969 case '#': /* it's a comment */
2970 while ((c = nextc()) != '\n') {
2972 return lasttok = NEWLINE_EOF;
2975 return lasttok = NEWLINE;
2978 return lasttok = '@';
2981 #ifdef RELAXED_CONTINUATION
2983 * This code puports to allow comments and/or whitespace
2984 * after the `\' at the end of a line used for continuation.
2985 * Use it at your own risk. We think it's a bad idea, which
2986 * is why it's not on by default.
2988 if (! do_traditional) {
2989 /* strip trailing white-space and/or comment */
2990 while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
2993 static short warned = FALSE;
2995 if (do_lint && ! warned) {
2998 _("use of `\\ #...' line continuation is not portable"));
3000 while ((c = nextc()) != '\n')
3006 #endif /* RELAXED_CONTINUATION */
3008 if (c == '\r') /* allow MS-DOS files. bleah */
3014 yyerror(_("backslash not last character on line"));
3015 return lasttok = LEX_EOF;
3021 yylval = GET_INSTRUCTION(Op_cond_exp);
3027 * in_parens is undefined unless we are parsing a print
3028 * statement (in_print), but why bother with a check?
3038 yylval = GET_INSTRUCTION(Op_field_spec);
3041 if (++in_braces == 1)
3042 firstline = sourceline;
3051 yylval = GET_INSTRUCTION(Op_sub_array);
3054 yylval = GET_INSTRUCTION(Op_subscript);
3055 lasttok = SUBSCRIPT; /* end of subscripts */
3060 if ((c = nextc()) == '=') {
3061 yylval = GET_INSTRUCTION(Op_assign_times);
3062 return lasttok = ASSIGNOP;
3063 } else if (do_posix) {
3065 yylval = GET_INSTRUCTION(Op_times);
3066 return lasttok = '*';
3067 } else if (c == '*') {
3068 /* make ** and **= aliases for ^ and ^= */
3069 static int did_warn_op = FALSE, did_warn_assgn = FALSE;
3071 if (nextc() == '=') {
3072 if (! did_warn_assgn) {
3073 did_warn_assgn = TRUE;
3075 lintwarn(_("POSIX does not allow operator `**='"));
3077 warning(_("old awk does not support operator `**='"));
3079 yylval = GET_INSTRUCTION(Op_assign_exp);
3083 if (! did_warn_op) {
3086 lintwarn(_("POSIX does not allow operator `**'"));
3088 warning(_("old awk does not support operator `**'"));
3090 yylval = GET_INSTRUCTION(Op_exp);
3091 return lasttok = '^';
3095 yylval = GET_INSTRUCTION(Op_times);
3096 return lasttok = '*';
3099 if (nextc() == '=') {
3101 return lasttok = SLASH_BEFORE_EQUAL;
3104 yylval = GET_INSTRUCTION(Op_quotient);
3105 return lasttok = '/';
3108 if (nextc() == '=') {
3109 yylval = GET_INSTRUCTION(Op_assign_mod);
3110 return lasttok = ASSIGNOP;
3113 yylval = GET_INSTRUCTION(Op_mod);
3114 return lasttok = '%';
3118 static int did_warn_op = FALSE, did_warn_assgn = FALSE;
3120 if (nextc() == '=') {
3121 if (do_lint_old && ! did_warn_assgn) {
3122 did_warn_assgn = TRUE;
3123 warning(_("operator `^=' is not supported in old awk"));
3125 yylval = GET_INSTRUCTION(Op_assign_exp);
3126 return lasttok = ASSIGNOP;
3129 if (do_lint_old && ! did_warn_op) {
3131 warning(_("operator `^' is not supported in old awk"));
3133 yylval = GET_INSTRUCTION(Op_exp);
3134 return lasttok = '^';
3138 if ((c = nextc()) == '=') {
3139 yylval = GET_INSTRUCTION(Op_assign_plus);
3140 return lasttok = ASSIGNOP;
3143 yylval = GET_INSTRUCTION(Op_symbol);
3144 return lasttok = INCREMENT;
3147 yylval = GET_INSTRUCTION(Op_plus);
3148 return lasttok = '+';
3151 if ((c = nextc()) == '=') {
3152 yylval = GET_INSTRUCTION(Op_notequal);
3153 return lasttok = RELOP;
3156 yylval = GET_INSTRUCTION(Op_nomatch);
3157 return lasttok = MATCHOP;
3160 yylval = GET_INSTRUCTION(Op_symbol);
3161 return lasttok = '!';
3164 if (nextc() == '=') {
3165 yylval = GET_INSTRUCTION(Op_leq);
3166 return lasttok = RELOP;
3168 yylval = GET_INSTRUCTION(Op_less);
3170 return lasttok = '<';
3173 if (nextc() == '=') {
3174 yylval = GET_INSTRUCTION(Op_equal);
3175 return lasttok = RELOP;
3177 yylval = GET_INSTRUCTION(Op_assign);
3179 return lasttok = ASSIGN;
3182 if ((c = nextc()) == '=') {
3183 yylval = GET_INSTRUCTION(Op_geq);
3184 return lasttok = RELOP;
3185 } else if (c == '>') {
3186 yylval = GET_INSTRUCTION(Op_symbol);
3187 yylval->redir_type = redirect_append;
3188 return lasttok = IO_OUT;
3191 if (in_print && in_parens == 0) {
3192 yylval = GET_INSTRUCTION(Op_symbol);
3193 yylval->redir_type = redirect_output;
3194 return lasttok = IO_OUT;
3196 yylval = GET_INSTRUCTION(Op_greater);
3197 return lasttok = '>';
3200 yylval = GET_INSTRUCTION(Op_match);
3201 return lasttok = MATCHOP;
3205 * Added did newline stuff. Easier than
3206 * hacking the grammar.
3209 did_newline = FALSE;
3210 if (--in_braces == 0)
3211 lastline = sourceline;
3215 --lexptr; /* pick up } next time */
3216 return lasttok = NEWLINE;
3221 while ((c = nextc()) != '"') {
3224 yyerror(_("unterminated string"));
3225 return lasttok = LEX_EOF;
3227 if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
3235 if (! want_source || c != '"')
3238 if (c == END_FILE) {
3240 yyerror(_("unterminated string"));
3241 return lasttok = LEX_EOF;
3245 yylval = GET_INSTRUCTION(Op_token);
3247 yylval->lextok = estrdup(tokstart, tok - tokstart);
3248 return lasttok = FILENAME;
3251 yylval->opcode = Op_push_i;
3252 yylval->memory = make_str_node(tokstart,
3253 tok - tokstart, esc_seen ? SCAN : 0);
3254 yylval->memory->flags &= ~MALLOC;
3255 yylval->memory->flags |= PERM;
3257 yylval->memory->flags |= INTLSTR;
3260 dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
3262 return lasttok = YSTRING;
3265 if ((c = nextc()) == '=') {
3266 yylval = GET_INSTRUCTION(Op_assign_minus);
3267 return lasttok = ASSIGNOP;
3270 yylval = GET_INSTRUCTION(Op_symbol);
3271 return lasttok = DECREMENT;
3274 yylval = GET_INSTRUCTION(Op_minus);
3275 return lasttok = '-';
3281 return lasttok = '.';
3297 int gotnumber = FALSE;
3305 if (tok == tokstart + 2) {
3308 if (isxdigit(peek)) {
3310 pushback(); /* following digit */
3312 pushback(); /* x or X */
3318 /* period ends exponent part of floating point number */
3319 if (seen_point || seen_e) {
3334 if ((c = nextc()) == '-' || c == '+') {
3341 pushback(); /* non-digit after + or - */
3342 pushback(); /* + or - */
3343 pushback(); /* e or E */
3345 } else if (! isdigit(c)) {
3346 pushback(); /* character after e or E */
3347 pushback(); /* e or E */
3349 pushback(); /* digit */
3362 if (do_traditional || ! inhex)
3387 yylval = GET_INSTRUCTION(Op_push_i);
3388 if (! do_traditional && isnondecimal(tokstart, FALSE)) {
3390 if (isdigit((unsigned char) tokstart[1])) /* not an 'x' or 'X' */
3391 lintwarn("numeric constant `%.*s' treated as octal",
3392 (int) strlen(tokstart)-1, tokstart);
3393 else if (tokstart[1] == 'x' || tokstart[1] == 'X')
3394 lintwarn("numeric constant `%.*s' treated as hexadecimal",
3395 (int) strlen(tokstart)-1, tokstart);
3397 yylval->memory = mk_number(nondec2awknum(tokstart, strlen(tokstart)),
3398 PERM|NUMCUR|NUMBER);
3400 yylval->memory = mk_number(atof(tokstart), PERM|NUMCUR|NUMBER);
3401 return lasttok = YNUMBER;
3404 if ((c = nextc()) == '&') {
3405 yylval = GET_INSTRUCTION(Op_and);
3407 return lasttok = LEX_AND;
3410 yylval = GET_INSTRUCTION(Op_symbol);
3411 return lasttok = '&';
3414 if ((c = nextc()) == '|') {
3415 yylval = GET_INSTRUCTION(Op_or);
3417 return lasttok = LEX_OR;
3418 } else if (! do_traditional && c == '&') {
3419 yylval = GET_INSTRUCTION(Op_symbol);
3420 yylval->redir_type = redirect_twoway;
3421 return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
3424 if (in_print && in_parens == 0) {
3425 yylval = GET_INSTRUCTION(Op_symbol);
3426 yylval->redir_type = redirect_pipe;
3427 return lasttok = IO_OUT;
3429 yylval = GET_INSTRUCTION(Op_symbol);
3430 yylval->redir_type = redirect_pipein;
3431 return lasttok = IO_IN;
3435 if (c != '_' && ! isalpha(c)) {
3436 yyerror(_("invalid char '%c' in expression"), c);
3437 return lasttok = LEX_EOF;
3441 * Lots of fog here. Consider:
3443 * print "xyzzy"$_"foo"
3445 * Without the check for ` lasttok != '$' ', this is parsed as
3447 * print "xxyzz" $(_"foo")
3449 * With the check, it is "correctly" parsed as three
3450 * string concatenations. Sigh. This seems to be
3451 * "more correct", but this is definitely one of those
3452 * occasions where the interactions are funny.
3454 if (! do_traditional && c == '_' && lasttok != '$') {
3455 if ((c = nextc()) == '"') {
3463 /* it's some type of name-type-thing. Find its length. */
3465 while (c != END_FILE && is_identchar(c)) {
3472 /* See if it is a special token. */
3473 if ((mid = check_special(tokstart)) >= 0) {
3474 static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
3475 int class = tokentab[mid].class;
3477 if ((class == LEX_INCLUDE || class == LEX_EVAL)
3482 if ((tokentab[mid].flags & GAWKX) && ! (warntab[mid] & GAWKX)) {
3483 lintwarn(_("`%s' is a gawk extension"),
3484 tokentab[mid].operator);
3485 warntab[mid] |= GAWKX;
3487 if ((tokentab[mid].flags & RESX) && ! (warntab[mid] & RESX)) {
3488 lintwarn(_("`%s' is a Bell Labs extension"),
3489 tokentab[mid].operator);
3490 warntab[mid] |= RESX;
3492 if ((tokentab[mid].flags & NOT_POSIX) && ! (warntab[mid] & NOT_POSIX)) {
3493 lintwarn(_("POSIX does not allow `%s'"),
3494 tokentab[mid].operator);
3495 warntab[mid] |= NOT_POSIX;
3498 if (do_lint_old && (tokentab[mid].flags & NOT_OLD)
3499 && ! (warntab[mid] & NOT_OLD)
3501 warning(_("`%s' is not supported in old awk"),
3502 tokentab[mid].operator);
3503 warntab[mid] |= NOT_OLD;
3506 if (tokentab[mid].flags & BREAK)
3508 if (tokentab[mid].flags & CONTINUE)
3516 if (in_main_context())
3518 emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
3520 memcpy(tokkey + 1, tokstart, tok - tokstart);
3521 yylval = GET_INSTRUCTION(Op_token);
3522 yylval->lextok = tokkey;
3530 yylval = bcalloc(tokentab[mid].value, 3, sourceline);
3538 return lasttok = class;
3541 yylval = bcalloc(tokentab[mid].value, 2, sourceline);
3545 yylval = GET_INSTRUCTION(tokentab[mid].value);
3546 if (class == LEX_BUILTIN || class == LEX_LENGTH)
3547 yylval->builtin_idx = mid;
3550 return lasttok = class;
3553 tokkey = estrdup(tokstart, tok - tokstart);
3554 if (*lexptr == '(') {
3555 yylval = bcalloc(Op_token, 2, sourceline);
3556 yylval->lextok = tokkey;
3557 return lasttok = FUNC_CALL;
3559 static short goto_warned = FALSE;
3561 yylval = GET_INSTRUCTION(Op_token);
3562 yylval->lextok = tokkey;
3564 #define SMART_ALECK 1
3565 if (SMART_ALECK && do_lint
3566 && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
3568 lintwarn(_("`goto' considered harmful!\n"));
3570 return lasttok = NAME;
3573 #undef GET_INSTRUCTION
3577 /* mk_symbol --- allocates a symbol for the symbol table. */
3580 mk_symbol(NODETYPE type, NODE *value)
3589 r->parent_array = NULL;
3590 r->var_assign = (Func_ptr) 0;
3594 /* snode --- instructions for builtin functions. Checks for arg. count
3595 and supplies defaults where possible. */
3597 static INSTRUCTION *
3598 snode(INSTRUCTION *subn, INSTRUCTION *r)
3605 int idx = r->builtin_idx;
3609 for (tp = subn->nexti; tp; tp = tp->nexti) {
3616 /* check against how many args. are allowed for this builtin */
3617 args_allowed = tokentab[idx].flags & ARGS;
3618 if (args_allowed && (args_allowed & A(nexp)) == 0) {
3619 yyerror(_("%d is invalid as number of arguments for %s"),
3620 nexp, tokentab[idx].operator);
3624 /* special processing for sub, gsub and gensub */
3626 if (tokentab[idx].value == Op_sub_builtin) {
3627 const char *operator = tokentab[idx].operator;
3631 arg = subn->nexti; /* first arg list */
3632 (void) mk_rexp(arg);
3634 if (strcmp(operator, "gensub") != 0) {
3637 if (strcmp(operator, "gsub") == 0)
3638 r->sub_flags |= GSUB;
3640 arg = arg->lasti->nexti; /* 2nd arg list */
3644 expr = list_create(instruction(Op_push_i));
3645 expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
3646 (void) mk_expression_list(subn,
3647 list_append(expr, instruction(Op_field_spec)));
3650 arg = arg->lasti->nexti; /* third arg list */
3652 if (ip->opcode == Op_push_i) {
3654 lintwarn(_("%s: string literal as last arg of substitute has no effect"),
3656 r->sub_flags |= LITERAL;
3658 if (make_assignable(ip) == NULL)
3659 yyerror(_("%s third parameter is not a changeable object"),
3662 ip->do_reference = TRUE;
3665 r->expr_count = count_expressions(&subn, FALSE);
3668 (void) list_append(subn, r);
3670 /* add after_assign code */
3671 if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
3672 (void) list_append(subn, instruction(Op_var_assign));
3673 subn->lasti->assign_ctxt = Op_sub_builtin;
3674 subn->lasti->assign_var = ip->memory->var_assign;
3675 } else if (ip->opcode == Op_field_spec_lhs) {
3676 (void) list_append(subn, instruction(Op_field_assign));
3677 subn->lasti->assign_ctxt = Op_sub_builtin;
3678 subn->lasti->field_assign = (Func_ptr) 0;
3679 ip->target_assign = subn->lasti;
3686 r->sub_flags |= GENSUB;
3688 ip = instruction(Op_push_i);
3689 ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
3690 (void) mk_expression_list(subn,
3691 list_append(list_create(ip), instruction(Op_field_spec)));
3694 r->expr_count = count_expressions(&subn, FALSE);
3695 return list_append(subn, r);
3699 r->builtin = tokentab[idx].ptr;
3701 /* special case processing for a few builtins */
3703 if (r->builtin == do_length) {
3705 /* no args. Use $0 */
3709 list = list_create(r);
3710 (void) list_prepend(list, instruction(Op_field_spec));
3711 (void) list_prepend(list, instruction(Op_push_i));
3712 list->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
3716 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3717 arg->nexti->opcode = Op_push_arg; /* argument may be array */
3719 } else if (r->builtin == do_isarray) {
3721 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3722 arg->nexti->opcode = Op_push_arg; /* argument may be array */
3723 } else if (r->builtin == do_match) {
3724 static short warned = FALSE;
3726 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
3727 (void) mk_rexp(arg);
3729 if (nexp == 3) { /* 3rd argument there */
3730 if (do_lint && ! warned) {
3732 lintwarn(_("match: third argument is a gawk extension"));
3734 if (do_traditional) {
3735 yyerror(_("match: third argument is a gawk extension"));
3739 arg = arg->lasti->nexti; /* third arg list */
3741 if (/*ip == arg->nexti && */ ip->opcode == Op_push)
3742 ip->opcode = Op_push_array;
3744 } else if (r->builtin == do_split) {
3745 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
3747 if (ip->opcode == Op_push)
3748 ip->opcode = Op_push_array;
3751 expr = list_create(instruction(Op_push));
3752 expr->nexti->memory = FS_node;
3753 (void) mk_expression_list(subn, expr);
3755 arg = arg->lasti->nexti;
3758 n->re_flags |= FS_DFLT;
3760 arg = arg->lasti->nexti;
3762 if (ip->opcode == Op_push)
3763 ip->opcode = Op_push_array;
3765 } else if (r->builtin == do_patsplit) {
3766 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
3768 if (ip->opcode == Op_push)
3769 ip->opcode = Op_push_array;
3772 expr = list_create(instruction(Op_push));
3773 expr->nexti->memory = FPAT_node;
3774 (void) mk_expression_list(subn, expr);
3776 arg = arg->lasti->nexti;
3779 arg = arg->lasti->nexti;
3781 if (ip->opcode == Op_push)
3782 ip->opcode = Op_push_array;
3784 } else if (r->builtin == do_close) {
3785 static short warned = FALSE;
3787 if (do_lint && ! warned) {
3789 lintwarn(_("close: second argument is a gawk extension"));
3791 if (do_traditional) {
3792 yyerror(_("close: second argument is a gawk extension"));
3796 } else if (do_intl /* --gen-po */
3797 && r->builtin == do_dcgettext /* dcgettext(...) */
3798 && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
3799 && (subn->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
3800 /* ala xgettext, dcgettext("some string" ...) dumps the string */
3801 NODE *str = subn->nexti->lasti->memory;
3803 if ((str->flags & INTLSTR) != 0)
3804 warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
3805 /* don't dump it, the lexer already did */
3807 dumpintlstr(str->stptr, str->stlen);
3808 } else if (do_intl /* --gen-po */
3809 && r->builtin == do_dcngettext /* dcngettext(...) */
3810 && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
3811 && (subn->nexti->lasti->memory->flags & STRCUR) != 0 /* it's a string constant */
3812 && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i /* 2nd arg is constant too */
3813 && (subn->nexti->lasti->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
3814 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
3815 NODE *str1 = subn->nexti->lasti->memory;
3816 NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
3818 if (((str1->flags | str2->flags) & INTLSTR) != 0)
3819 warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
3821 dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
3822 } else if (r->builtin == do_asort || r->builtin == do_asorti) {
3823 arg = subn->nexti; /* 1st arg list */
3825 if (ip->opcode == Op_push)
3826 ip->opcode = Op_push_array;
3830 if (ip->opcode == Op_push)
3831 ip->opcode = Op_push_array;
3835 else if (r->builtin == do_adump) {
3836 ip = subn->nexti->lasti;
3837 if (ip->opcode == Op_push)
3838 ip->opcode = Op_push_array;
3843 r->expr_count = count_expressions(&subn, FALSE);
3844 return list_append(subn, r);
3848 return list_create(r);
3851 /* append_param --- append PNAME to the list of parameters
3852 * for the current function.
3856 append_param(char *pname)
3858 static NODE *savetail = NULL;
3861 p = make_param(pname);
3862 if (func_params == NULL) {
3865 } else if (savetail != NULL) {
3866 savetail->rnode = p;
3871 /* dup_parms --- return TRUE if there are duplicate parameters */
3874 dup_parms(INSTRUCTION *fp, NODE *func)
3877 const char *fname, **names;
3878 int count, i, j, dups;
3881 if (func == NULL) /* error earlier */
3884 fname = func->param;
3885 count = func->param_cnt;
3886 params = func->rnode;
3888 if (count == 0) /* no args, no problem */
3891 if (params == NULL) /* error earlier */
3894 emalloc(names, const char **, count * sizeof(char *), "dup_parms");
3897 for (np = params; np != NULL; np = np->rnode) {
3898 if (np->param == NULL) { /* error earlier, give up, go home */
3902 names[i++] = np->param;
3906 for (i = 1; i < count; i++) {
3907 for (j = 0; j < i; j++) {
3908 if (strcmp(names[i], names[j]) == 0) {
3910 error_ln(fp->source_line,
3911 _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
3912 fname, i + 1, names[j], j+1);
3918 return (dups > 0 ? TRUE : FALSE);
3921 /* parms_shadow --- check if parameters shadow globals */
3924 parms_shadow(INSTRUCTION *pc, int *shadow)
3931 func = pc->func_body;
3932 fname = func->lnode->param;
3934 #if 0 /* can't happen, already exited if error ? */
3935 if (fname == NULL || func == NULL) /* error earlier */
3939 pcount = func->lnode->param_cnt;
3941 if (pcount == 0) /* no args, no problem */
3944 source = pc->source_file;
3945 sourceline = pc->source_line;
3947 * Use warning() and not lintwarn() so that can warn
3948 * about all shadowed parameters.
3950 for (i = 0; i < pcount; i++) {
3951 if (lookup(func->parmlist[i]) != NULL) {
3953 _("function `%s': parameter `%s' shadows global variable"),
3954 fname, func->parmlist[i]);
3966 * Install a name in the symbol table, even if it is already there.
3967 * Caller must check against redefinition if that is desired.
3972 install_symbol(char *name, NODE *value)
3979 (*install_func)(name);
3983 bucket = hash(name, len, (unsigned long) HASHSIZE, NULL);
3985 hp->type = Node_hashnode;
3986 hp->hnext = variables[bucket];
3987 variables[bucket] = hp;
3991 hp->hvalue->vname = name;
3995 /* lookup --- find the most recent hash node for name installed by install_symbol */
3998 lookup(const char *name)
4004 for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE, NULL)];
4005 bucket != NULL; bucket = bucket->hnext)
4006 if (bucket->hlength == len && strncmp(bucket->hname, name, len) == 0)
4007 return bucket->hvalue;
4011 /* sym_comp --- compare two symbol (variable or function) names */
4014 sym_comp(const void *v1, const void *v2)
4016 const NODE *const *npp1, *const *npp2;
4017 const NODE *n1, *n2;
4020 npp1 = (const NODE *const *) v1;
4021 npp2 = (const NODE *const *) v2;
4025 if (n1->hlength > n2->hlength)
4026 minlen = n1->hlength;
4028 minlen = n2->hlength;
4030 return strncmp(n1->hname, n2->hname, minlen);
4033 /* valinfo --- dump var info */
4036 valinfo(NODE *n, int (*print_func)(FILE *, const char *, ...), FILE *fp)
4038 if (n == Nnull_string)
4039 print_func(fp, "uninitialized scalar\n");
4040 else if (n->flags & STRING) {
4041 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE);
4042 print_func(fp, "\n");
4043 } else if (n->flags & NUMBER)
4044 print_func(fp, "%.17g\n", n->numbr);
4045 else if (n->flags & STRCUR) {
4046 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE);
4047 print_func(fp, "\n");
4048 } else if (n->flags & NUMCUR)
4049 print_func(fp, "%.17g\n", n->numbr);
4051 print_func(fp, "?? flags %s\n", flags2str(n->flags));
4054 /* get_varlist --- list of global variables */
4063 emalloc(table, NODE **, (var_count + 1) * sizeof(NODE *), "get_varlist");
4064 update_global_values();
4065 for (i = j = 0; i < HASHSIZE; i++)
4066 for (p = variables[i]; p != NULL; p = p->hnext)
4068 assert(j == var_count);
4071 qsort(table, j, sizeof(NODE *), sym_comp);
4077 /* print_vars --- print names and values of global variables */
4080 print_vars(int (*print_func)(FILE *, const char *, ...), FILE *fp)
4086 table = get_varlist();
4087 for (i = 0; (p = table[i]) != NULL; i++) {
4088 if (p->hvalue->type == Node_func)
4090 print_func(fp, "%.*s: ", (int) p->hlength, p->hname);
4091 if (p->hvalue->type == Node_var_array)
4092 print_func(fp, "array, %ld elements\n", p->hvalue->table_size);
4093 else if (p->hvalue->type == Node_var_new)
4094 print_func(fp, "untyped variable\n");
4095 else if (p->hvalue->type == Node_var)
4096 valinfo(p->hvalue->var_value, print_func, fp);
4101 /* dump_vars --- dump the symbol table */
4104 dump_vars(const char *fname)
4110 else if ((fp = fopen(fname, "w")) == NULL) {
4111 warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
4112 warning(_("sending variable list to standard error"));
4116 print_vars(fprintf, fp);
4117 if (fp != stderr && fclose(fp) != 0)
4118 warning(_("%s: close failed (%s)"), fname, strerror(errno));
4121 /* release_all_vars --- free all variable memory */
4129 for (i = 0; i < HASHSIZE; i++) {
4130 for (p = variables[i]; p != NULL; p = next) {
4133 if (p->hvalue->type == Node_func)
4135 else if (p->hvalue->type == Node_var_array)
4136 assoc_clear(p->hvalue);
4137 else if (p->hvalue->type != Node_var_new)
4138 unref(p->hvalue->var_value);
4141 freenode(p->hvalue);
4147 /* dump_funcs --- print all functions */
4152 if (func_count <= 0)
4155 (void) foreach_func((int (*)(INSTRUCTION *, void *)) pp_func, TRUE, (void *) 0);
4158 /* shadow_funcs --- check all functions for parameters that shadow globals */
4163 static int calls = 0;
4166 if (func_count <= 0)
4170 fatal(_("shadow_funcs() called twice!"));
4172 (void) foreach_func((int (*)(INSTRUCTION *, void *)) parms_shadow, TRUE, &shadow);
4174 /* End with fatal if the user requested it. */
4175 if (shadow && lintfunc != warning)
4176 lintwarn(_("there were shadowed variables."));
4181 * check if name is already installed; if so, it had better have Null value,
4182 * in which case def is added as the value. Otherwise, install name with def
4185 * Extra work, build up and save a list of the parameter names in a table
4186 * and hang it off params->parmlist. This is used to set the `vname' field
4187 * of each function parameter during a function call. See eval.c.
4191 func_install(INSTRUCTION *func, INSTRUCTION *def)
4194 NODE *r, *n, *thisfunc, *hp;
4195 char **pnames = NULL;
4200 params = func_params;
4202 /* check for function foo(foo) { ... }. bleah. */
4203 for (n = params->rnode; n != NULL; n = n->rnode) {
4204 if (strcmp(n->param, params->param) == 0) {
4205 error_ln(func->source_line,
4206 _("function `%s': can't use function name as parameter name"), params->param);
4208 } else if (is_std_var(n->param)) {
4209 error_ln(func->source_line,
4210 _("function `%s': can't use special variable `%s' as a function parameter"),
4211 params->param, n->param);
4216 thisfunc = NULL; /* turn off warnings */
4218 fname = params->param;
4219 /* symbol table management */
4220 hp = remove_symbol(params->param); /* remove function name out of symbol table */
4225 error_ln(func->source_line,
4226 _("function name `%s' previously defined"), fname);
4228 } else if (fname == builtin_func) /* not a valid function name */
4231 /* add an implicit return at end;
4232 * also used by 'return' command in debugger
4235 (void) list_append(def, instruction(Op_push_i));
4236 def->lasti->memory = Nnull_string;
4237 (void) list_append(def, instruction(Op_K_return));
4240 (void) list_prepend(def, instruction(Op_exec_count));
4242 /* func->opcode is Op_func */
4243 (func + 1)->firsti = def->nexti;
4244 (func + 1)->lasti = def->lasti;
4245 (func + 2)->first_line = func->source_line;
4246 (func + 2)->last_line = lastline;
4248 func->nexti = def->nexti;
4251 (void) list_append(rule_list, func + 1); /* debugging */
4253 /* install the function */
4254 thisfunc = mk_symbol(Node_func, params);
4255 (void) install_symbol(fname, thisfunc);
4256 thisfunc->code_ptr = func;
4257 func->func_body = thisfunc;
4259 for (n = params->rnode; n != NULL; n = n->rnode)
4263 emalloc(pnames, char **, (pcount + 1) * sizeof(char *), "func_install");
4264 for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode)
4265 pnames[i] = n->param;
4266 pnames[pcount] = NULL;
4268 thisfunc->parmlist = pnames;
4270 /* update lint table info */
4271 func_use(fname, FUNC_DEFINE);
4273 func_count++; /* used in profiler / pretty printer */
4276 /* remove params from symbol table */
4277 pop_params(params->rnode);
4281 /* remove_symbol --- remove a variable from the symbol table */
4284 remove_symbol(char *name)
4286 NODE *bucket, **save;
4290 save = &(variables[hash(name, len, (unsigned long) HASHSIZE, NULL)]);
4291 for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
4292 if (len == bucket->hlength && strncmp(bucket->hname, name, len) == 0) {
4294 *save = bucket->hnext;
4297 save = &(bucket->hnext);
4302 /* pop_params --- remove list of function parameters from symbol table */
4305 * pop parameters out of the symbol table. do this in reverse order to
4306 * avoid reading freed memory if there were duplicated parameters.
4309 pop_params(NODE *params)
4314 pop_params(params->rnode);
4315 hp = remove_symbol(params->param);
4320 /* make_param --- make NAME into a function parameter */
4323 make_param(char *name)
4328 r->type = Node_param_list;
4330 r->param_cnt = param_counter++;
4331 return (install_symbol(name, r));
4334 static struct fdesc {
4339 } *ftable[HASHSIZE];
4341 /* func_use --- track uses and definitions of functions */
4344 func_use(const char *name, enum defref how)
4351 ind = hash(name, len, HASHSIZE, NULL);
4353 for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
4354 if (strcmp(fp->name, name) == 0) {
4355 if (how == FUNC_DEFINE)
4363 /* not in the table, fall through to allocate a new one */
4365 emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
4366 memset(fp, '\0', sizeof(struct fdesc));
4367 emalloc(fp->name, char *, len + 1, "func_use");
4368 strcpy(fp->name, name);
4369 if (how == FUNC_DEFINE)
4373 fp->next = ftable[ind];
4377 /* check_funcs --- verify functions that are called but not defined */
4382 struct fdesc *fp, *next;
4385 if (! in_main_context())
4388 for (i = 0; i < HASHSIZE; i++) {
4389 for (fp = ftable[i]; fp != NULL; fp = fp->next) {
4391 /* making this the default breaks old code. sigh. */
4392 if (fp->defined == 0) {
4394 _("function `%s' called but never defined"), fp->name);
4398 if (do_lint && fp->defined == 0)
4400 _("function `%s' called but never defined"), fp->name);
4402 if (do_lint && fp->used == 0) {
4403 lintwarn(_("function `%s' defined but never called directly"),
4410 /* now let's free all the memory */
4411 for (i = 0; i < HASHSIZE; i++) {
4412 for (fp = ftable[i]; fp != NULL; fp = next) {
4421 /* param_sanity --- look for parameters that are regexp constants */
4424 param_sanity(INSTRUCTION *arglist)
4426 INSTRUCTION *argl, *arg;
4429 if (arglist == NULL)
4431 for (argl = arglist->nexti; argl; ) {
4433 if (arg->opcode == Op_match_rec)
4434 warning_ln(arg->source_line,
4435 _("regexp constant for parameter #%d yields boolean value"), i);
4441 /* foreach_func --- execute given function for each awk function in symbol table. */
4444 foreach_func(int (*pfunc)(INSTRUCTION *, void *), int sort, void *data)
4454 * Walk through symbol table counting functions.
4455 * Could be more than func_count if there are
4456 * extension functions.
4458 for (i = j = 0; i < HASHSIZE; i++) {
4459 for (p = variables[i]; p != NULL; p = p->hnext) {
4460 if (p->hvalue->type == Node_func) {
4469 emalloc(tab, NODE **, j * sizeof(NODE *), "foreach_func");
4471 /* now walk again, copying info */
4472 for (i = j = 0; i < HASHSIZE; i++) {
4473 for (p = variables[i]; p != NULL; p = p->hnext) {
4474 if (p->hvalue->type == Node_func) {
4482 qsort(tab, j, sizeof(NODE *), sym_comp);
4484 for (i = 0; i < j; i++) {
4485 if ((ret = pfunc(tab[i]->hvalue->code_ptr, data)) != 0)
4494 for (i = 0; i < HASHSIZE; i++) {
4495 for (p = variables[i]; p != NULL; p = p->hnext) {
4496 if (p->hvalue->type == Node_func
4497 && (ret = pfunc(p->hvalue->code_ptr, data)) != 0)
4504 /* deferred variables --- those that are only defined if needed. */
4507 * Is there any reason to use a hash table for deferred variables? At the
4508 * moment, there are only 1 to 3 such variables, so it may not be worth
4509 * the overhead. If more modules start using this facility, it should
4510 * probably be converted into a hash table.
4513 static struct deferred_variable {
4514 NODE *(*load_func)(void);
4515 struct deferred_variable *next;
4516 char name[1]; /* variable-length array */
4517 } *deferred_variables;
4519 /* register_deferred_variable --- add a var name and loading function to the list */
4522 register_deferred_variable(const char *name, NODE *(*load_func)(void))
4524 struct deferred_variable *dv;
4525 size_t sl = strlen(name);
4527 emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
4528 "register_deferred_variable");
4529 dv->load_func = load_func;
4530 dv->next = deferred_variables;
4531 memcpy(dv->name, name, sl+1);
4532 deferred_variables = dv;
4535 /* variable --- make sure NAME is in the symbol table */
4538 variable(char *name, NODETYPE type)
4542 if ((r = lookup(name)) != NULL) {
4543 if (r->type == Node_func) {
4544 error(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
4547 r->type = Node_var_new; /* continue parsing instead of exiting */
4551 struct deferred_variable *dv;
4553 for (dv = deferred_variables; TRUE; dv = dv->next) {
4556 * This is the only case in which we may not free the string.
4558 if (type == Node_var)
4559 r = mk_symbol(type, Nnull_string);
4561 r = mk_symbol(type, (NODE *) NULL);
4562 return install_symbol(name, r);
4564 if (strcmp(name, dv->name) == 0) {
4565 r = (*dv->load_func)();
4574 /* make_regnode --- make a regular expression node */
4577 make_regnode(int type, NODE *exp)
4582 memset(n, 0, sizeof(NODE));
4586 if (type == Node_regex) {
4587 n->re_reg = make_regexp(exp->stptr, exp->stlen, FALSE, TRUE, FALSE);
4588 if (n->re_reg == NULL) {
4593 n->re_flags = CONSTANT;
4599 /* mk_rexp --- make a regular expression constant */
4602 mk_rexp(INSTRUCTION *list)
4607 if (ip == list->lasti && ip->opcode == Op_match_rec)
4608 ip->opcode = Op_push_re;
4610 ip = instruction(Op_push_re);
4611 ip->memory = make_regnode(Node_dynregex, NULL);
4612 ip->nexti = list->lasti->nexti;
4613 list->lasti->nexti = ip;
4619 /* isnoeffect --- when used as a statement, has no side effects */
4622 isnoeffect(OPCODE type)
4639 case Op_unary_minus:
4656 break; /* keeps gcc -Wall happy */
4662 /* make_assignable --- make this operand an assignable one if posiible */
4664 static INSTRUCTION *
4665 make_assignable(INSTRUCTION *ip)
4667 switch (ip->opcode) {
4669 if (ip->memory->type == Node_param_list
4670 && (ip->memory->flags & FUNC) != 0)
4672 ip->opcode = Op_push_lhs;
4675 ip->opcode = Op_field_spec_lhs;
4678 ip->opcode = Op_subscript_lhs;
4681 break; /* keeps gcc -Wall happy */
4686 /* stopme --- for debugging */
4689 stopme(int nargs ATTRIBUTE_UNUSED)
4694 /* dumpintlstr --- write out an initial .po file entry for the string */
4697 dumpintlstr(const char *str, size_t len)
4701 /* See the GNU gettext distribution for details on the file format */
4703 if (source != NULL) {
4704 /* ala the gettext sources, remove leading `./'s */
4705 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4707 printf("#: %s:%d\n", cp, sourceline);
4711 pp_string_fp(fprintf, stdout, str, len, '"', TRUE);
4713 printf("msgstr \"\"\n\n");
4717 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
4720 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
4724 /* See the GNU gettext distribution for details on the file format */
4726 if (source != NULL) {
4727 /* ala the gettext sources, remove leading `./'s */
4728 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4730 printf("#: %s:%d\n", cp, sourceline);
4734 pp_string_fp(fprintf, stdout, str1, len1, '"', TRUE);
4736 printf("msgid_plural ");
4737 pp_string_fp(fprintf, stdout, str2, len2, '"', TRUE);
4739 printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
4743 /* isarray --- can this type be subscripted? */
4750 case Node_var_array:
4752 case Node_param_list:
4753 return (n->flags & FUNC) == 0;
4754 case Node_array_ref:
4758 break; /* keeps gcc -Wall happy */
4764 /* mk_binary --- instructions for binary operators */
4766 static INSTRUCTION *
4767 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
4769 INSTRUCTION *ip1,*ip2;
4773 if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
4774 /* do any numeric constant folding */
4777 && ip1 == s1->lasti && ip1->opcode == Op_push_i
4778 && (ip1->memory->flags & (STRCUR|STRING)) == 0
4779 && (ip2->memory->flags & (STRCUR|STRING)) == 0
4781 NODE *n1 = ip1->memory, *n2 = ip2->memory;
4782 res = force_number(n1);
4783 (void) force_number(n2);
4784 switch (op->opcode) {
4789 if (n2->numbr == 0.0) {
4790 /* don't fatalize, allow parsing rest of the input */
4791 error_ln(op->source_line, _("division by zero attempted"));
4798 if (n2->numbr == 0.0) {
4799 /* don't fatalize, allow parsing rest of the input */
4800 error_ln(op->source_line, _("division by zero attempted in `%%'"));
4804 res = fmod(res, n2->numbr);
4805 #else /* ! HAVE_FMOD */
4806 (void) modf(res / n2->numbr, &res);
4807 res = n1->numbr - res * n2->numbr;
4808 #endif /* ! HAVE_FMOD */
4817 res = calc_exp(res, n2->numbr);
4823 op->opcode = Op_push_i;
4824 op->memory = mk_number(res, (PERM|NUMCUR|NUMBER));
4826 n1->flags |= MALLOC;
4828 n2->flags |= MALLOC;
4835 return list_create(op);
4837 /* do basic arithmetic optimisation */
4838 /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
4839 switch (op->opcode) {
4841 op->opcode = Op_times_i;
4844 op->opcode = Op_quotient_i;
4847 op->opcode = Op_mod_i;
4850 op->opcode = Op_plus_i;
4853 op->opcode = Op_minus_i;
4856 op->opcode = Op_exp_i;
4862 op->memory = ip2->memory;
4864 bcfree(s2); /* Op_list */
4865 return list_append(s1, op);
4870 /* append lists s1, s2 and add `op' bytecode */
4871 (void) list_merge(s1, s2);
4872 return list_append(s1, op);
4875 /* mk_boolean --- instructions for boolean and, or */
4877 static INSTRUCTION *
4878 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
4881 OPCODE opc, final_opc;
4883 opc = op->opcode; /* Op_and or Op_or */
4884 final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
4886 add_lint(right, LINT_assign_in_cond);
4890 if (tp->opcode != final_opc) { /* x || y */
4891 list_append(right, instruction(final_opc));
4892 add_lint(left, LINT_assign_in_cond);
4893 (void) list_append(left, op);
4894 left->lasti->target_jmp = right->lasti;
4896 /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
4897 * target_stmt only used in the parser (see below).
4900 left->lasti->target_stmt = left->lasti;
4901 right->lasti->target_stmt = left->lasti;
4902 } else { /* optimization for x || y || z || ... */
4905 op->opcode = final_opc;
4906 (void) list_append(right, op);
4907 op->target_stmt = tp;
4909 tp->target_jmp = op;
4911 /* update jump targets */
4912 for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
4913 assert(ip->opcode == opc);
4914 assert(ip->target_jmp == tp);
4915 /* if (ip->opcode == opc && ip->target_jmp == tp) */
4916 ip->target_jmp = op;
4917 if (ip->target_stmt == ip)
4922 return list_merge(left, right);
4925 /* mk_condition --- if-else and conditional */
4927 static INSTRUCTION *
4928 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
4929 INSTRUCTION *elsep, INSTRUCTION *false_branch)
4935 * t: [Op_jmp_false f ]
4951 if (false_branch == NULL) {
4952 false_branch = list_create(instruction(Op_no_op));
4953 if (elsep != NULL) { /* else { } */
4955 (void) list_prepend(false_branch, elsep);
4960 /* assert(elsep != NULL); */
4962 /* avoid a series of no_op's: if .. else if .. else if .. */
4963 if (false_branch->lasti->opcode != Op_no_op)
4964 (void) list_append(false_branch, instruction(Op_no_op));
4966 (void) list_prepend(false_branch, elsep);
4967 false_branch->nexti->branch_end = false_branch->lasti;
4968 (void) list_prepend(false_branch, instruction(Op_exec_count));
4973 (void) list_prepend(false_branch, instruction(Op_jmp));
4974 false_branch->nexti->target_jmp = false_branch->lasti;
4976 add_lint(cond, LINT_assign_in_cond);
4977 ip = list_append(cond, instruction(Op_jmp_false));
4978 ip->lasti->target_jmp = false_branch->nexti->nexti;
4981 (void) list_prepend(ip, ifp);
4982 (void) list_append(ip, instruction(Op_exec_count));
4983 ip->nexti->branch_if = ip->lasti;
4984 ip->nexti->branch_else = false_branch->nexti;
4988 if (true_branch != NULL)
4989 list_merge(ip, true_branch);
4990 return list_merge(ip, false_branch);
4993 enum defline { FIRST_LINE, LAST_LINE };
4995 /* find_line -- find the first(last) line in a list of (pattern) instructions */
4998 find_line(INSTRUCTION *pattern, enum defline what)
5003 for (ip = pattern->nexti; ip; ip = ip->nexti) {
5004 if (what == LAST_LINE) {
5005 if (ip->source_line > lineno)
5006 lineno = ip->source_line;
5007 } else { /* FIRST_LINE */
5008 if (ip->source_line > 0
5009 && (lineno == 0 || ip->source_line < lineno))
5010 lineno = ip->source_line;
5012 if (ip == pattern->lasti)
5019 /* append_rule --- pattern-action instructions */
5021 static INSTRUCTION *
5022 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
5043 (void) list_append(action, instruction(Op_no_op));
5044 (rp + 1)->firsti = action->nexti;
5045 (rp + 1)->lasti = action->lasti;
5046 (rp + 2)->first_line = pattern->source_line;
5047 (rp + 2)->last_line = lastline;
5048 ip = list_prepend(action, rp);
5051 rp = bcalloc(Op_rule, 3, 0);
5053 rp->source_file = source;
5054 tp = instruction(Op_no_op);
5056 if (pattern == NULL) {
5057 /* assert(action != NULL); */
5059 (void) list_prepend(action, instruction(Op_exec_count));
5060 (rp + 1)->firsti = action->nexti;
5061 (rp + 1)->lasti = tp;
5062 (rp + 2)->first_line = firstline;
5063 (rp + 2)->last_line = lastline;
5064 rp->source_line = firstline;
5065 ip = list_prepend(list_append(action, tp), rp);
5067 (void) list_append(pattern, instruction(Op_jmp_false));
5068 pattern->lasti->target_jmp = tp;
5069 (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
5070 rp->source_line = (rp + 2)->first_line;
5071 if (action == NULL) {
5072 (rp + 2)->last_line = find_line(pattern, LAST_LINE);
5073 action = list_create(instruction(Op_K_print_rec));
5075 (void) list_prepend(action, instruction(Op_exec_count));
5077 (rp + 2)->last_line = lastline;
5080 (void) list_prepend(pattern, instruction(Op_exec_count));
5081 (void) list_prepend(action, instruction(Op_exec_count));
5083 (rp + 1)->firsti = action->nexti;
5084 (rp + 1)->lasti = tp;
5086 list_merge(list_prepend(pattern, rp),
5093 list_append(rule_list, rp + 1);
5095 if (rule_block[rule] == NULL)
5096 rule_block[rule] = ip;
5098 (void) list_merge(rule_block[rule], ip);
5100 return rule_block[rule];
5103 /* mk_assignment --- assignment bytecodes */
5105 static INSTRUCTION *
5106 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
5112 switch (tp->opcode) {
5114 tp->opcode = Op_field_spec_lhs;
5117 tp->opcode = Op_subscript_lhs;
5121 tp->opcode = Op_push_lhs;
5127 tp->do_reference = (op->opcode != Op_assign); /* check for uninitialized reference */
5130 ip = list_merge(rhs, lhs);
5134 (void) list_append(ip, op);
5136 if (tp->opcode == Op_push_lhs
5137 && tp->memory->type == Node_var
5138 && tp->memory->var_assign
5140 tp->do_reference = FALSE; /* no uninitialized reference checking
5141 * for a special variable.
5143 (void) list_append(ip, instruction(Op_var_assign));
5144 ip->lasti->assign_var = tp->memory->var_assign;
5145 } else if (tp->opcode == Op_field_spec_lhs) {
5146 (void) list_append(ip, instruction(Op_field_assign));
5147 ip->lasti->field_assign = (Func_ptr) 0;
5148 tp->target_assign = ip->lasti;
5154 /* optimize_assignment --- peephole optimization for assignment */
5156 static INSTRUCTION *
5157 optimize_assignment(INSTRUCTION *exp)
5164 * Optimize assignment statements array[subs] = x; var = x; $n = x;
5165 * string concatenation of the form s = s t.
5167 * 1) Array element assignment array[subs] = x:
5168 * Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
5169 * with single instruction Op_store_sub.
5170 * Limitation: 1 dimension and sub is simple var/value.
5172 * 2) Simple variable assignment var = x:
5173 * Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
5175 * 3) Field assignment $n = x:
5176 * Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
5177 * with Op_store_field.
5179 * 4) Optimization for string concatenation:
5180 * For cases like x = x y, uses realloc to include y in x;
5181 * also eliminates instructions Op_push_lhs and Op_pop.
5185 * N.B.: do not append Op_pop instruction to the returned
5186 * instruction list if optimized. None of these
5187 * optimized instructions pushes the r-value of assignment
5188 * onto the runtime stack.
5195 || ( i1->opcode != Op_assign
5196 && i1->opcode != Op_field_assign)
5198 return list_append(exp, instruction(Op_pop));
5200 for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
5201 switch (i2->opcode) {
5203 if (i2->nexti->opcode == Op_push_lhs /* l.h.s is a simple variable */
5204 && (i2->concat_flag & CSVAR) /* 1st exp in r.h.s is a simple variable;
5205 * see Op_concat in the grammer above.
5207 && i2->nexti->memory == exp->nexti->memory /* and the same as in l.h.s */
5208 && i2->nexti->nexti == i1
5209 && i1->opcode == Op_assign
5211 /* s = s ... optimization */
5213 /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
5214 * check for l-value reference to this variable in the r.h.s.
5215 * Also, avoid function calls in general to guard against
5216 * global variable assignment.
5219 for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
5220 if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
5221 || i3->opcode == Op_func_call)
5222 return list_append(exp, instruction(Op_pop)); /* no optimization */
5225 /* remove the variable from r.h.s */
5227 exp->nexti = i3->nexti;
5230 if (--i2->expr_count == 1) /* one less expression in Op_concat */
5231 i2->opcode = Op_no_op;
5234 assert(i3->opcode == Op_push_lhs);
5235 i3->opcode = Op_assign_concat; /* change Op_push_lhs to Op_assign_concat */
5237 bcfree(i1); /* Op_assign */
5238 exp->lasti = i3; /* update Op_list */
5243 case Op_field_spec_lhs:
5244 if (i2->nexti->opcode == Op_assign
5245 && i2->nexti->nexti == i1
5246 && i1->opcode == Op_field_assign
5249 i2->opcode = Op_store_field;
5250 bcfree(i2->nexti); /* Op_assign */
5252 bcfree(i1); /* Op_field_assign */
5253 exp->lasti = i2; /* update Op_list */
5259 if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
5260 i3 = i2->nexti->nexti;
5261 if (i3->sub_count == 1
5263 && i1->opcode == Op_assign
5265 /* array[sub] = .. */
5266 i3->opcode = Op_store_sub;
5267 i3->memory = i2->memory;
5268 i3->expr_count = 1; /* sub_count shadows memory,
5269 * so use expr_count instead.
5272 i2->opcode = Op_no_op;
5273 bcfree(i1); /* Op_assign */
5274 exp->lasti = i3; /* update Op_list */
5282 && i1->opcode == Op_assign
5285 i2->opcode = Op_store_var;
5287 bcfree(i1); /* Op_assign */
5288 exp->lasti = i2; /* update Op_list */
5298 /* no optimization */
5299 return list_append(exp, instruction(Op_pop));
5303 /* mk_getline --- make instructions for getline */
5305 static INSTRUCTION *
5306 mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype)
5310 INSTRUCTION *asgn = NULL;
5313 * getline [var] < [file]
5315 * [ file (simp_exp)]
5317 * [ Op_K_getline_redir|NULL|redir_type|into_var]
5322 if (redir == NULL) {
5323 int sline = op->source_line;
5325 op = bcalloc(Op_K_getline, 2, sline);
5326 (op + 1)->target_endfile = ip_endfile;
5327 (op + 1)->target_beginfile = ip_beginfile;
5331 tp = make_assignable(var->lasti);
5334 /* check if we need after_assign bytecode */
5335 if (tp->opcode == Op_push_lhs
5336 && tp->memory->type == Node_var
5337 && tp->memory->var_assign
5339 asgn = instruction(Op_var_assign);
5340 asgn->assign_ctxt = op->opcode;
5341 asgn->assign_var = tp->memory->var_assign;
5342 } else if (tp->opcode == Op_field_spec_lhs) {
5343 asgn = instruction(Op_field_assign);
5344 asgn->assign_ctxt = op->opcode;
5345 asgn->field_assign = (Func_ptr) 0; /* determined at run time */
5346 tp->target_assign = asgn;
5348 if (redir != NULL) {
5349 ip = list_merge(redir, var);
5350 (void) list_append(ip, op);
5352 ip = list_append(var, op);
5353 } else if (redir != NULL)
5354 ip = list_append(redir, op);
5356 ip = list_create(op);
5357 op->into_var = (var != NULL);
5358 op->redir_type = (redir != NULL) ? redirtype : 0;
5360 return (asgn == NULL ? ip : list_append(ip, asgn));
5364 /* mk_for_loop --- for loop bytecodes */
5366 static INSTRUCTION *
5367 mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
5368 INSTRUCTION *incr, INSTRUCTION *body)
5371 * ------------------------
5372 * init (may be NULL)
5373 * ------------------------
5375 * cond (Op_no_op if NULL)
5376 * ------------------------
5377 * [ Op_jmp_false tb ]
5378 * ------------------------
5379 * body (may be NULL)
5380 * ------------------------
5382 * incr (may be NULL)
5384 * ------------------------
5388 INSTRUCTION *ip, *tbreak, *tcont;
5390 INSTRUCTION *pp_cond;
5393 tbreak = instruction(Op_no_op);
5396 add_lint(cond, LINT_assign_in_cond);
5397 pp_cond = cond->nexti;
5399 (void) list_append(ip, instruction(Op_jmp_false));
5400 ip->lasti->target_jmp = tbreak;
5402 pp_cond = instruction(Op_no_op);
5403 ip = list_create(pp_cond);
5407 ip = list_merge(init, ip);
5410 (void) list_append(ip, instruction(Op_exec_count));
5411 (forp + 1)->forloop_cond = pp_cond;
5412 (forp + 1)->forloop_body = ip->lasti;
5416 (void) list_merge(ip, body);
5418 jmp = instruction(Op_jmp);
5419 jmp->target_jmp = pp_cond;
5423 tcont = incr->nexti;
5424 (void) list_merge(ip, incr);
5427 (void) list_append(ip, jmp);
5428 ret = list_append(ip, tbreak);
5429 fix_break_continue(ret, tbreak, tcont);
5432 forp->target_break = tbreak;
5433 forp->target_continue = tcont;
5434 ret = list_prepend(ret, forp);
5441 /* add_lint --- add lint warning bytecode if needed */
5444 add_lint(INSTRUCTION *list, LINTTYPE linttype)
5450 case LINT_assign_in_cond:
5452 if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
5453 assert(ip != list->nexti);
5454 for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5458 if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
5459 list_append(list, instruction(Op_lint));
5460 list->lasti->lint_type = linttype;
5464 case LINT_no_effect:
5465 if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
5466 for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5469 if (do_lint) { /* compile-time warning */
5470 if (isnoeffect(ip->opcode))
5471 lintwarn_ln(ip->source_line, ("statement may have no effect"));
5474 if (ip->opcode == Op_push) { /* run-time warning */
5475 list_append(list, instruction(Op_lint));
5476 list->lasti->lint_type = linttype;
5487 /* mk_expression_list --- list of bytecode lists */
5489 static INSTRUCTION *
5490 mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
5494 /* we can't just combine all bytecodes, since we need to
5495 * process individual expressions for a few builtins in snode() (-:
5498 /* -- list of lists */
5499 /* [Op_list| ... ]------
5501 * [Op_list| ... ] -- |
5504 * [Op_list| ... ] -- |
5510 assert(s1 != NULL && s1->opcode == Op_list);
5512 list = instruction(Op_list);
5514 list->lasti = s1->lasti;
5518 /* append expression to the end of the list */
5522 list->lasti = s1->lasti;
5526 /* count_expressions --- fixup expression_list from mk_expression_list.
5527 * returns no of expressions in list. isarg is true
5528 * for function arguments.
5532 count_expressions(INSTRUCTION **list, int isarg)
5535 INSTRUCTION *r = NULL;
5538 if (*list == NULL) /* error earlier */
5541 for (expr = (*list)->nexti; expr; ) {
5542 INSTRUCTION *t1, *t2;
5545 if (isarg && t1 == t2 && t1->opcode == Op_push)
5546 t1->opcode = Op_push_param;
5550 (void) list_merge(r, expr);
5555 if (! isarg && count > max_args)
5562 /* fix_break_continue --- fix up break & continue codes in loop bodies */
5565 fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target)
5569 list->lasti->nexti = NULL; /* just to make sure */
5571 for (ip = list->nexti; ip != NULL; ip = ip->nexti) {
5572 switch (ip->opcode) {
5574 if (ip->target_jmp == NULL)
5575 ip->target_jmp = b_target;
5579 if (ip->target_jmp == NULL)
5580 ip->target_jmp = c_target;
5584 /* this is to keep the compiler happy. sheesh. */
5591 /* append_symbol --- append symbol to the list of symbols
5592 * installed in the symbol table.
5596 append_symbol(char *name)
5600 /* N.B.: func_install removes func name and reinstalls it;
5601 * and we get two entries for it here!. destroy_symbol()
5602 * will find and destroy the Node_func which is what we want.
5606 hp->hname = name; /* shallow copy */
5607 hp->hnext = symbol_list->hnext;
5608 symbol_list->hnext = hp;
5611 /* release_symbol --- free symbol list and optionally remove symbol from symbol table */
5614 release_symbols(NODE *symlist, int keep_globals)
5618 for (hp = symlist->hnext; hp != NULL; hp = n) {
5619 if (! keep_globals) {
5620 /* destroys globals, function, and params
5621 * if still in symbol table and not removed by func_install
5622 * due to parse error.
5624 destroy_symbol(hp->hname);
5629 symlist->hnext = NULL;
5632 /* destroy_symbol --- remove a symbol from symbol table
5633 * and free all associated memory.
5637 destroy_symbol(char *name)
5641 symbol = lookup(name);
5645 if (symbol->type == Node_func) {
5650 varnames = func->parmlist;
5651 if (varnames != NULL)
5654 /* function parameters of type Node_param_list */
5655 for (n = func->lnode->rnode; n != NULL; ) {
5662 freenode(func->lnode);
5665 } else if (symbol->type == Node_var_array)
5666 assoc_clear(symbol);
5667 else if (symbol->type == Node_var)
5668 unref(symbol->var_value);
5670 /* remove from symbol table */
5671 hp = remove_symbol(name);
5673 freenode(hp->hvalue);
5677 #define pool_size d.dl
5679 static INSTRUCTION *pool_list;
5680 static AWK_CONTEXT *curr_ctxt = NULL;
5682 /* new_context --- create a new execution context. */
5689 emalloc(ctxt, AWK_CONTEXT *, sizeof(AWK_CONTEXT), "new_context");
5690 memset(ctxt, 0, sizeof(AWK_CONTEXT));
5691 ctxt->srcfiles.next = ctxt->srcfiles.prev = &ctxt->srcfiles;
5692 ctxt->rule_list.opcode = Op_list;
5693 ctxt->rule_list.lasti = &ctxt->rule_list;
5697 /* set_context --- change current execution context. */
5700 set_context(AWK_CONTEXT *ctxt)
5702 pool_list = &ctxt->pools;
5703 symbol_list = &ctxt->symbols;
5704 srcfiles = &ctxt->srcfiles;
5705 rule_list = &ctxt->rule_list;
5706 install_func = ctxt->install_func;
5713 * Switch to the given context after saving the current one. The set
5714 * of active execution contexts forms a stack; the global or main context
5715 * is at the bottom of the stack.
5719 push_context(AWK_CONTEXT *ctxt)
5721 ctxt->prev = curr_ctxt;
5722 /* save current source and sourceline */
5723 if (curr_ctxt != NULL) {
5724 curr_ctxt->sourceline = sourceline;
5725 curr_ctxt->source = source;
5732 /* pop_context --- switch to previous execution context. */
5739 assert(curr_ctxt != NULL);
5740 ctxt = curr_ctxt->prev;
5741 /* restore source and sourceline */
5742 sourceline = ctxt->sourceline;
5743 source = ctxt->source;
5747 /* in_main_context --- are we in the main context ? */
5752 assert(curr_ctxt != NULL);
5753 return (curr_ctxt->prev == NULL);
5756 /* free_context --- free context structure and related data. */
5759 free_context(AWK_CONTEXT *ctxt, int keep_globals)
5766 assert(curr_ctxt != ctxt);
5768 /* free all code including function codes */
5769 free_bcpool(&ctxt->pools);
5771 release_symbols(&ctxt->symbols, keep_globals);
5773 for (s = &ctxt->srcfiles; s != &ctxt->srcfiles; s = sn) {
5775 if (s->stype != SRC_CMDLINE && s->stype != SRC_STDIN)
5783 /* free_bc_internal --- free internal memory of an instruction. */
5786 free_bc_internal(INSTRUCTION *cp)
5790 switch(cp->opcode) {
5792 if (cp->func_name != NULL
5793 && cp->func_name != builtin_func
5795 efree(cp->func_name);
5802 if (m->re_reg != NULL)
5804 if (m->re_exp != NULL)
5806 if (m->re_text != NULL)
5810 case Op_token: /* token lost during error recovery in yyparse */
5811 if (cp->lextok != NULL)
5822 /* INSTR_CHUNK must be > largest code size (3) */
5823 #define INSTR_CHUNK 127
5825 /* bcfree --- deallocate instruction */
5828 bcfree(INSTRUCTION *cp)
5831 cp->nexti = pool_list->freei;
5832 pool_list->freei = cp;
5835 /* bcalloc --- allocate a new instruction */
5838 bcalloc(OPCODE op, int size, int srcline)
5843 /* wide instructions Op_rule, Op_func_call .. */
5844 emalloc(cp, INSTRUCTION *, (size + 1) * sizeof(INSTRUCTION), "bcalloc");
5845 cp->pool_size = size;
5846 cp->nexti = pool_list->nexti;
5847 pool_list->nexti = cp++;
5851 pool = pool_list->freei;
5854 emalloc(cp, INSTRUCTION *, (INSTR_CHUNK + 1) * sizeof(INSTRUCTION), "bcalloc");
5856 cp->pool_size = INSTR_CHUNK;
5857 cp->nexti = pool_list->nexti;
5858 pool_list->nexti = cp;
5860 last = &pool[INSTR_CHUNK - 1];
5861 for (; cp <= last; cp++) {
5869 pool_list->freei = cp->nexti;
5872 memset(cp, 0, size * sizeof(INSTRUCTION));
5874 cp->source_line = srcline;
5878 /* free_bcpool --- free list of instruction memory pools */
5881 free_bcpool(INSTRUCTION *pl)
5883 INSTRUCTION *pool, *tmp;
5885 for (pool = pl->nexti; pool != NULL; pool = tmp) {
5886 INSTRUCTION *cp, *last;
5888 psiz = pool->pool_size;
5889 if (psiz == INSTR_CHUNK)
5893 for (cp = pool + 1; cp <= last ; cp++) {
5894 if (cp->opcode != 0)
5895 free_bc_internal(cp);
5900 memset(pl, 0, sizeof(INSTRUCTION));
5904 static inline INSTRUCTION *
5905 list_create(INSTRUCTION *x)
5909 l = instruction(Op_list);
5915 static inline INSTRUCTION *
5916 list_append(INSTRUCTION *l, INSTRUCTION *x)
5919 if (l->opcode != Op_list)
5922 l->lasti->nexti = x;
5927 static inline INSTRUCTION *
5928 list_prepend(INSTRUCTION *l, INSTRUCTION *x)
5931 if (l->opcode != Op_list)
5934 x->nexti = l->nexti;
5939 static inline INSTRUCTION *
5940 list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
5943 if (l1->opcode != Op_list)
5945 if (l2->opcode != Op_list)
5948 l1->lasti->nexti = l2->nexti;
5949 l1->lasti = l2->lasti;
5954 /* See if name is a special token. */
5957 check_special(const char *name)
5961 #if 'a' == 0x81 /* it's EBCDIC */
5962 static int did_sort = FALSE;
5965 qsort((void *) tokentab,
5966 sizeof(tokentab) / sizeof(tokentab[0]),
5967 sizeof(tokentab[0]), tokcompare);
5973 high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
5974 while (low <= high) {
5975 mid = (low + high) / 2;
5976 i = *name - tokentab[mid].operator[0];
5978 i = strcmp(name, tokentab[mid].operator);
5980 if (i < 0) /* token < mid */
5982 else if (i > 0) /* token > mid */
5985 if ((do_traditional && (tokentab[mid].flags & GAWKX))
5986 || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
5995 * This provides a private version of functions that act like VMS's
5996 * variable-length record filesystem, where there was a bug on
5997 * certain source files.
6000 static FILE *fp = NULL;
6002 /* read_one_line --- return one input line at a time. mainly for debugging. */
6005 read_one_line(int fd, void *buffer, size_t count)
6009 /* Minor potential memory leak here. Too bad. */
6011 fp = fdopen(fd, "r");
6013 fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
6014 gawk_exit(EXIT_FAILURE);
6018 if (fgets(buf, sizeof buf, fp) == NULL)
6021 memcpy(buffer, buf, strlen(buf));
6025 /* one_line_close --- close the open file being read with read_one_line() */
6028 one_line_close(int fd)
6032 if (fp == NULL || fd != fileno(fp))
6033 fatal("debugging read/close screwed up!");