2 * awkgram.y --- yacc/bison parser
6 * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
33 #if defined(__STDC__) && __STDC__ < 1 /* VMS weirdness, maybe elsewhere */
37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
39 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
40 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
41 static char *get_src_buf(void);
42 static int yylex(void);
44 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
45 static char **check_params(char *fname, int pcount, INSTRUCTION *list);
46 static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist);
47 static NODE *mk_rexp(INSTRUCTION *exp);
48 static void param_sanity(INSTRUCTION *arglist);
49 static int parms_shadow(INSTRUCTION *pc, bool *shadow);
51 static int isnoeffect(OPCODE type);
53 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
54 static void dumpintlstr(const char *str, size_t len);
55 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
56 static int include_source(INSTRUCTION *file);
57 static int load_library(INSTRUCTION *file);
58 static void next_sourcefile(void);
59 static char *tokexpand(void);
60 static bool is_deferred_variable(const char *name);
62 #define instruction(t) bcalloc(t, 1, 0)
64 static INSTRUCTION *mk_program(void);
65 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
66 static INSTRUCTION *mk_function(INSTRUCTION *fi, INSTRUCTION *def);
67 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
68 INSTRUCTION *elsep, INSTRUCTION *false_branch);
69 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
70 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
71 INSTRUCTION *incr, INSTRUCTION *body);
72 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
73 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
74 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
75 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
76 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
77 static NODE *make_regnode(int type, NODE *exp);
78 static int count_expressions(INSTRUCTION **list, bool isarg);
79 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
80 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
82 static void process_deferred();
84 enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT };
85 static void func_use(const char *name, enum defref how);
86 static void check_funcs(void);
88 static ssize_t read_one_line(int fd, void *buffer, size_t count);
89 static int one_line_close(int fd);
91 static bool want_source = false;
92 static bool want_regexp = false; /* lexical scanning kludge */
93 static char *in_function; /* parsing kludge */
94 static bool symtab_used = false; /* program used SYMTAB */
97 const char *const ruletab[] = {
106 static bool in_print = false; /* lexical scanning kludge for print */
107 static int in_parens = 0; /* lexical scanning kludge for print */
108 static int sub_counter = 0; /* array dimension counter for use in delete */
109 static char *lexptr = NULL; /* pointer to next char during parsing */
111 static char *lexptr_begin; /* keep track of where we were for error msgs */
112 static char *lexeme; /* beginning of lexeme for debugging */
113 static bool lexeof; /* seen EOF for current source? */
114 static char *thisline = NULL;
115 static int in_braces = 0; /* count braces for firstline, lastline in an 'action' */
116 static int lastline = 0;
117 static int firstline = 0;
118 static SRCFILE *sourcefile = NULL; /* current program source */
119 static int lasttok = 0;
120 static bool eof_warned = false; /* GLOBAL: want warning for each file */
121 static int break_allowed; /* kludge for break */
122 static int continue_allowed; /* kludge for continue */
124 #define END_FILE -1000
125 #define END_SRC -2000
127 #define YYDEBUG_LEXER_TEXT (lexeme)
128 static char *tokstart = NULL;
129 static char *tok = NULL;
131 static int errcount = 0;
134 extern int sourceline;
135 extern SRCFILE *srcfiles;
136 extern INSTRUCTION *rule_list;
138 extern NODE **args_array;
140 static INSTRUCTION *rule_block[sizeof(ruletab)];
142 static INSTRUCTION *ip_rec;
143 static INSTRUCTION *ip_newfile;
144 static INSTRUCTION *ip_atexit = NULL;
145 static INSTRUCTION *ip_end;
146 static INSTRUCTION *ip_endfile;
147 static INSTRUCTION *ip_beginfile;
149 static inline INSTRUCTION *list_create(INSTRUCTION *x);
150 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
151 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
152 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
154 extern double fmod(double x, double y);
156 #define YYSTYPE INSTRUCTION *
158 #define is_identchar(c) (isalnum(c) || (c) == '_')
161 %token FUNC_CALL NAME REGEXP FILENAME
162 %token YNUMBER YSTRING
163 %token RELOP IO_OUT IO_IN
164 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
166 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
167 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
168 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
169 %token LEX_BEGINFILE LEX_ENDFILE
170 %token LEX_GETLINE LEX_NEXTFILE
172 %token LEX_AND LEX_OR INCREMENT DECREMENT
173 %token LEX_BUILTIN LEX_LENGTH
175 %token LEX_INCLUDE LEX_EVAL LEX_LOAD
178 /* Lowest to highest */
179 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
185 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
188 %nonassoc RELOP '<' '>' IO_IN IO_OUT
190 %left YSTRING YNUMBER
195 %left INCREMENT DECREMENT
211 if (sourcefile == srcfiles)
218 * If errors, give up, don't produce an infinite
219 * stream of syntax error messages.
228 (void) append_rule($1, $2);
230 | pattern statement_term
233 msg(_("%s blocks must have an action part"), ruletab[rule]);
235 } else if ($1 == NULL) {
236 msg(_("each rule must have a pattern or an action part"));
238 } else /* pattern rule with non-empty pattern */
239 (void) append_rule($1, NULL);
241 | function_prologue action
244 (void) mk_function($1, $2);
247 | '@' LEX_INCLUDE source statement_term
252 | '@' LEX_LOAD library statement_term
262 if (include_source($1) < 0)
277 if (load_library($1) < 0)
291 { $$ = NULL; rule = Rule; }
293 { $$ = $1; rule = Rule; }
294 | exp ',' opt_nls exp
298 add_lint($1, LINT_assign_in_cond);
299 add_lint($4, LINT_assign_in_cond);
301 tp = instruction(Op_no_op);
302 list_prepend($1, bcalloc(Op_line_range, !!do_pretty_print + 1, 0));
303 $1->nexti->triggered = false;
304 $1->nexti->target_jmp = $4->nexti;
306 list_append($1, instruction(Op_cond_pair));
307 $1->lasti->line_range = $1->nexti;
308 $1->lasti->target_jmp = tp;
310 list_append($4, instruction(Op_cond_pair));
311 $4->lasti->line_range = $1->nexti;
312 $4->lasti->target_jmp = tp;
313 if (do_pretty_print) {
314 ($1->nexti + 1)->condpair_left = $1->lasti;
315 ($1->nexti + 1)->condpair_right = $4->lasti;
317 $$ = list_append(list_merge($1, $4), tp);
322 static int begin_seen = 0;
323 if (do_lint_old && ++begin_seen == 2)
324 warning_ln($1->source_line,
325 _("old awk does not support multiple `BEGIN' or `END' rules"));
327 $1->in_rule = rule = BEGIN;
328 $1->source_file = source;
333 static int end_seen = 0;
334 if (do_lint_old && ++end_seen == 2)
335 warning_ln($1->source_line,
336 _("old awk does not support multiple `BEGIN' or `END' rules"));
338 $1->in_rule = rule = END;
339 $1->source_file = source;
344 $1->in_rule = rule = BEGINFILE;
345 $1->source_file = source;
350 $1->in_rule = rule = ENDFILE;
351 $1->source_file = source;
357 : l_brace statements r_brace opt_semi opt_nls
360 $$ = list_create(instruction(Op_no_op));
373 yyerror(_("`%s' is a built-in function, it cannot be redefined"),
387 : LEX_FUNCTION func_name '(' opt_param_list r_paren opt_nls
389 $1->source_file = source;
390 if (install_function($2->lextok, $1, $4) < 0)
392 in_function = $2->lextok;
395 /* $4 already free'd in install_function */
402 * In this rule, want_regexp tells yylex that the next thing
403 * is a regexp so it should read up to the closing slash.
406 { want_regexp = true; }
407 REGEXP /* The terminating '/' is consumed by yylex(). */
418 lintwarn_ln($3->source_line,
419 _("regexp constant `//' looks like a C++ comment, but is not"));
420 else if (re[0] == '*' && re[len-1] == '*')
421 /* possible C comment */
422 lintwarn_ln($3->source_line,
423 _("regexp constant `/%s/' looks like a C comment, but is not"), re);
426 exp = make_str_node(re, len, ALREADY_MALLOCED);
427 n = make_regnode(Node_regex, exp);
433 $$->opcode = Op_match_rec;
447 | statements statement
452 add_lint($2, LINT_no_effect);
456 $$ = list_merge($1, $2);
472 | l_brace statements r_brace
477 $$ = list_prepend($1, instruction(Op_exec_count));
481 | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
483 INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
484 INSTRUCTION *ip, *nextc, *tbreak;
485 const char **case_values = NULL;
490 tbreak = instruction(Op_no_op);
491 cstmt = list_create(tbreak);
492 cexp = list_create(instruction(Op_pop));
493 dflt = instruction(Op_jmp);
494 dflt->target_jmp = tbreak; /* if no case match and no explicit default */
498 bcfree($7); /* Op_list */
502 for(; curr != NULL; curr = nextc) {
503 INSTRUCTION *caseexp = curr->case_exp;
504 INSTRUCTION *casestmt = curr->case_stmt;
507 if (curr->opcode == Op_K_case) {
508 if (caseexp->opcode == Op_push_i) {
509 /* a constant scalar */
511 caseval = force_string(caseexp->memory)->stptr;
512 for (i = 0; i < case_count; i++) {
513 if (strcmp(caseval, case_values[i]) == 0)
514 error_ln(curr->source_line,
515 _("duplicate case values in switch body: %s"), caseval);
518 if (case_values == NULL)
519 emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
520 else if (case_count >= maxcount) {
522 erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
524 case_values[case_count++] = caseval;
526 /* match a constant regex against switch expression. */
527 (curr + 1)->match_exp = true;
529 curr->stmt_start = casestmt->nexti;
530 curr->stmt_end = casestmt->lasti;
531 (void) list_prepend(cexp, curr);
532 (void) list_prepend(cexp, caseexp);
534 if (dflt->target_jmp != tbreak)
535 error_ln(curr->source_line,
536 _("duplicate `default' detected in switch body"));
538 dflt->target_jmp = casestmt->nexti;
540 if (do_pretty_print) {
541 curr->stmt_start = casestmt->nexti;
542 curr->stmt_end = casestmt->lasti;
543 (void) list_prepend(cexp, curr);
548 cstmt = list_merge(casestmt, cstmt);
551 if (case_values != NULL)
555 if (do_pretty_print) {
556 (void) list_prepend(ip, $1);
557 (void) list_prepend(ip, instruction(Op_exec_count));
558 $1->target_break = tbreak;
559 ($1 + 1)->switch_start = cexp->nexti;
560 ($1 + 1)->switch_end = cexp->lasti;
564 (void) list_append(cexp, dflt);
565 (void) list_merge(ip, cexp);
566 $$ = list_merge(ip, cstmt);
569 fix_break_continue(ip, tbreak, NULL);
571 | LEX_WHILE '(' exp r_paren opt_nls statement
586 INSTRUCTION *ip, *tbreak, *tcont;
588 tbreak = instruction(Op_no_op);
589 add_lint($3, LINT_assign_in_cond);
591 ip = list_append($3, instruction(Op_jmp_false));
592 ip->lasti->target_jmp = tbreak;
594 if (do_pretty_print) {
595 (void) list_append(ip, instruction(Op_exec_count));
596 $1->target_break = tbreak;
597 $1->target_continue = tcont;
598 ($1 + 1)->while_body = ip->lasti;
599 (void) list_prepend(ip, $1);
604 (void) list_merge(ip, $6);
605 (void) list_append(ip, instruction(Op_jmp));
606 ip->lasti->target_jmp = tcont;
607 $$ = list_append(ip, tbreak);
611 fix_break_continue(ip, tbreak, tcont);
613 | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
627 INSTRUCTION *ip, *tbreak, *tcont;
629 tbreak = instruction(Op_no_op);
631 add_lint($6, LINT_assign_in_cond);
633 ip = list_merge($3, $6);
635 ip = list_prepend($6, instruction(Op_no_op));
637 (void) list_prepend(ip, instruction(Op_exec_count));
638 (void) list_append(ip, instruction(Op_jmp_true));
639 ip->lasti->target_jmp = ip->nexti;
640 $$ = list_append(ip, tbreak);
644 fix_break_continue(ip, tbreak, tcont);
646 if (do_pretty_print) {
647 $1->target_break = tbreak;
648 $1->target_continue = tcont;
649 ($1 + 1)->doloop_cond = tcont;
650 $$ = list_prepend(ip, $1);
653 $1 and $4 are NULLs */
655 | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
658 char *var_name = $3->lextok;
661 && $8->lasti->opcode == Op_K_delete
662 && $8->lasti->expr_count == 1
663 && $8->nexti->opcode == Op_push
664 && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
665 && strcmp($8->nexti->memory->vname, var_name) == 0
668 /* Efficiency hack. Recognize the special case of
673 * and treat it as if it were
677 * Check that the body is a `delete a[i]' statement,
678 * and that both the loop var and array names match.
682 ip = $8->nexti->nexti;
683 if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
684 arr = $5->nexti->memory;
686 && ip->opcode == Op_no_op
687 && ip->nexti->opcode == Op_push_array
688 && strcmp(ip->nexti->memory->vname, arr->vname) == 0
689 && ip->nexti->nexti == $8->lasti
691 (void) make_assignable($8->nexti);
692 $8->lasti->opcode = Op_K_delete_loop;
693 $8->lasti->expr_count = 0;
704 INSTRUCTION *tbreak, *tcont;
706 /* [ Op_push_array a ]
707 * [ Op_arrayfor_init | ib ]
708 * ic:[ Op_arrayfor_incr | ib ]
709 * [ Op_var_assign if any ]
714 * ib:[Op_arrayfor_final ]
718 ip->nexti->opcode = Op_push_array;
720 tbreak = instruction(Op_arrayfor_final);
721 $4->opcode = Op_arrayfor_incr;
722 $4->array_var = variable($3->source_line, var_name, Node_var);
723 $4->target_jmp = tbreak;
725 $3->opcode = Op_arrayfor_init;
726 $3->target_jmp = tbreak;
727 (void) list_append(ip, $3);
729 if (do_pretty_print) {
730 $1->opcode = Op_K_arrayfor;
731 $1->target_continue = tcont;
732 $1->target_break = tbreak;
733 (void) list_append(ip, $1);
737 /* add update_FOO instruction if necessary */
738 if ($4->array_var->type == Node_var && $4->array_var->var_update) {
739 (void) list_append(ip, instruction(Op_var_update));
740 ip->lasti->update_var = $4->array_var->var_update;
742 (void) list_append(ip, $4);
744 /* add set_FOO instruction if necessary */
745 if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
746 (void) list_append(ip, instruction(Op_var_assign));
747 ip->lasti->assign_var = $4->array_var->var_assign;
750 if (do_pretty_print) {
751 (void) list_append(ip, instruction(Op_exec_count));
752 ($1 + 1)->forloop_cond = $4;
753 ($1 + 1)->forloop_body = ip->lasti;
757 (void) list_merge(ip, $8);
759 (void) list_append(ip, instruction(Op_jmp));
760 ip->lasti->target_jmp = $4;
761 $$ = list_append(ip, tbreak);
762 fix_break_continue(ip, tbreak, tcont);
768 | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
770 $$ = mk_for_loop($1, $3, $6, $9, $12);
775 | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
777 $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
785 $$ = list_prepend($1, instruction(Op_exec_count));
792 : LEX_BREAK statement_term
795 error_ln($1->source_line,
796 _("`break' is not allowed outside a loop or switch"));
797 $1->target_jmp = NULL;
798 $$ = list_create($1);
801 | LEX_CONTINUE statement_term
803 if (! continue_allowed)
804 error_ln($1->source_line,
805 _("`continue' is not allowed outside a loop"));
806 $1->target_jmp = NULL;
807 $$ = list_create($1);
810 | LEX_NEXT statement_term
812 /* if inside function (rule = 0), resolve context at run-time */
813 if (rule && rule != Rule)
814 error_ln($1->source_line,
815 _("`next' used in %s action"), ruletab[rule]);
816 $1->target_jmp = ip_rec;
817 $$ = list_create($1);
819 | LEX_NEXTFILE statement_term
821 /* if inside function (rule = 0), resolve context at run-time */
822 if (rule == BEGIN || rule == END || rule == ENDFILE)
823 error_ln($1->source_line,
824 _("`nextfile' used in %s action"), ruletab[rule]);
826 $1->target_newfile = ip_newfile;
827 $1->target_endfile = ip_endfile;
828 $$ = list_create($1);
830 | LEX_EXIT opt_exp statement_term
832 /* Initialize the two possible jump targets, the actual target
833 * is resolved at run-time.
835 $1->target_end = ip_end; /* first instruction in end_block */
836 $1->target_atexit = ip_atexit; /* cleanup and go home */
839 $$ = list_create($1);
840 (void) list_prepend($$, instruction(Op_push_i));
841 $$->nexti->memory = dupnode(Nnull_string);
843 $$ = list_append($2, $1);
848 yyerror(_("`return' used outside function context"));
849 } opt_exp statement_term {
851 $$ = list_create($1);
852 (void) list_prepend($$, instruction(Op_push_i));
853 $$->nexti->memory = dupnode(Nnull_string);
856 && $3->lasti->opcode == Op_func_call
857 && strcmp($3->lasti->func_name, in_function) == 0
859 /* Do tail recursion optimization. Tail
860 * call without a return value is recognized
863 ($3->lasti + 1)->tail_call = true;
866 $$ = list_append($3, $1);
869 | simple_stmt statement_term
873 * A simple_stmt exists to satisfy a constraint in the POSIX
874 * grammar allowing them to occur as the 1st and 3rd parts
875 * in a `for (...;...;...)' loop. This is a historical oddity
876 * inherited from Unix awk, not at all documented in the AK&W
877 * awk book. We support it, as this was reported as a bug.
878 * We don't bother to document it though. So there.
881 : print { in_print = true; in_parens = 0; } print_expression_list output_redir
884 * Optimization: plain `print' has no expression list, so $3 is null.
885 * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
886 * which is faster for these two cases.
889 if ($1->opcode == Op_K_print &&
891 || ($3->lasti->opcode == Op_field_spec
892 && $3->nexti->nexti->nexti == $3->lasti
893 && $3->nexti->nexti->opcode == Op_push_i
894 && $3->nexti->nexti->memory->type == Node_val)
897 static bool warned = false;
904 * [Op_K_print_rec | NULL | redir_type | expr_count]
908 NODE *n = $3->nexti->nexti->memory;
913 bcfree($3->lasti); /* Op_field_spec */
914 unref(n); /* Node_val */
915 bcfree($3->nexti->nexti); /* Op_push_i */
916 bcfree($3->nexti); /* Op_list */
917 bcfree($3); /* Op_list */
919 if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
921 lintwarn_ln($1->source_line,
922 _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
927 $1->opcode = Op_K_print_rec;
928 if ($4 == NULL) { /* no redircetion */
929 $1->redir_type = redirect_none;
930 $$ = list_create($1);
934 $1->redir_type = ip->redir_type;
935 $4->nexti = ip->nexti;
937 $$ = list_append($4, $1);
944 * [ expression_list ]
946 * [$1 | NULL | redir_type | expr_count]
950 if ($4 == NULL) { /* no redirection */
951 if ($3 == NULL) { /* printf without arg */
953 $1->redir_type = redirect_none;
954 $$ = list_create($1);
957 $1->expr_count = count_expressions(&t, false);
958 $1->redir_type = redirect_none;
959 $$ = list_append(t, $1);
964 $1->redir_type = ip->redir_type;
965 $4->nexti = ip->nexti;
969 $$ = list_append($4, $1);
972 $1->expr_count = count_expressions(&t, false);
973 $$ = list_append(list_merge($4, t), $1);
979 | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
981 char *arr = $2->lextok;
983 $2->opcode = Op_push_array;
984 $2->memory = variable($2->source_line, arr, Node_var_new);
986 if (! do_posix && ! do_traditional) {
987 if ($2->memory == symbol_table)
988 fatal(_("`delete' is not allowed with SYMTAB"));
989 else if ($2->memory == func_table)
990 fatal(_("`delete' is not allowed with FUNCTAB"));
995 * As of September 2012, POSIX has added support
996 * for `delete array'. See:
997 * http://austingroupbugs.net/view.php?id=544
999 * Thanks to Nathan Weeks for the initiative.
1001 * Thus we no longer warn or check do_posix.
1002 * Also, since BWK awk supports it, we don't have to
1003 * check do_traditional either.
1006 $$ = list_append(list_create($2), $1);
1008 $1->expr_count = sub_counter;
1009 $$ = list_append(list_append($4, $2), $1);
1012 | LEX_DELETE '(' NAME ')'
1014 * this is for tawk compatibility. maybe the warnings
1015 * should always be done.
1018 static bool warned = false;
1019 char *arr = $3->lextok;
1021 if (do_lint && ! warned) {
1023 lintwarn_ln($1->source_line,
1024 _("`delete(array)' is a non-portable tawk extension"));
1026 if (do_traditional) {
1027 error_ln($1->source_line,
1028 _("`delete(array)' is a non-portable tawk extension"));
1030 $3->memory = variable($3->source_line, arr, Node_var_new);
1031 $3->opcode = Op_push_array;
1033 $$ = list_append(list_create($3), $1);
1035 if (! do_posix && ! do_traditional) {
1036 if ($3->memory == symbol_table)
1037 fatal(_("`delete' is not allowed with SYMTAB"));
1038 else if ($3->memory == func_table)
1039 fatal(_("`delete' is not allowed with FUNCTAB"));
1043 { $$ = optimize_assignment($1); }
1056 | case_statements case_statement
1059 $$ = list_create($2);
1061 $$ = list_prepend($1, $2);
1063 | case_statements error
1068 : LEX_CASE case_value colon opt_nls statements
1070 INSTRUCTION *casestmt = $5;
1072 casestmt = list_create(instruction(Op_no_op));
1073 if (do_pretty_print)
1074 (void) list_prepend(casestmt, instruction(Op_exec_count));
1076 $1->case_stmt = casestmt;
1080 | LEX_DEFAULT colon opt_nls statements
1082 INSTRUCTION *casestmt = $4;
1084 casestmt = list_create(instruction(Op_no_op));
1085 if (do_pretty_print)
1086 (void) list_prepend(casestmt, instruction(Op_exec_count));
1088 $1->case_stmt = casestmt;
1096 | '-' YNUMBER %prec UNARY
1098 NODE *n = $2->memory;
1099 (void) force_number(n);
1104 | '+' YNUMBER %prec UNARY
1113 $1->opcode = Op_push_re;
1126 * Note: ``print(x)'' is already parsed by the first rule,
1127 * so there is no good in covering it by the second one too.
1129 print_expression_list
1130 : opt_expression_list
1131 | '(' expression_list r_paren
1144 | IO_OUT { in_print = false; in_parens = 0; } common_exp
1146 if ($1->redir_type == redirect_twoway
1147 && $3->lasti->opcode == Op_K_getline_redir
1148 && $3->lasti->redir_type == redirect_twoway)
1149 yyerror(_("multistage two-way pipelines don't work"));
1150 $$ = list_prepend($3, $1);
1155 : LEX_IF '(' exp r_paren opt_nls statement
1157 $$ = mk_condition($3, $1, $6, NULL, NULL);
1159 | LEX_IF '(' exp r_paren opt_nls statement
1160 LEX_ELSE opt_nls statement
1162 $$ = mk_condition($3, $1, $6, $7, $9);
1196 $1->param_count = 0;
1197 $$ = list_create($1);
1199 | param_list comma NAME
1201 $3->param_count = $1->lasti->param_count + 1;
1202 $$ = list_append($1, $3);
1209 | param_list comma error
1213 /* optional expression, as in for loop */
1230 { $$ = mk_expression_list(NULL, $1); }
1231 | expression_list comma exp
1233 $$ = mk_expression_list($1, $3);
1238 | expression_list error
1241 * Returning the expression list instead of NULL lets
1242 * snode get a list of arguments that it can count.
1246 | expression_list error exp
1249 $$ = mk_expression_list($1, $3);
1251 | expression_list comma error
1258 /* Expressions, not including the comma operator. */
1260 : variable assign_operator exp %prec ASSIGNOP
1262 if (do_lint && $3->lasti->opcode == Op_match_rec)
1263 lintwarn_ln($2->source_line,
1264 _("regular expression on right of assignment"));
1265 $$ = mk_assignment($1, $3, $2);
1268 { $$ = mk_boolean($1, $3, $2); }
1270 { $$ = mk_boolean($1, $3, $2); }
1273 if ($1->lasti->opcode == Op_match_rec)
1274 warning_ln($2->source_line,
1275 _("regular expression on left of `~' or `!~' operator"));
1277 if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
1278 $2->memory = $3->nexti->memory;
1279 bcfree($3->nexti); /* Op_match_rec */
1280 bcfree($3); /* Op_list */
1281 $$ = list_append($1, $2);
1283 $2->memory = make_regnode(Node_dynregex, NULL);
1284 $$ = list_append(list_merge($1, $3), $2);
1287 | exp LEX_IN simple_variable
1290 warning_ln($2->source_line,
1291 _("old awk does not support the keyword `in' except after `for'"));
1292 $3->nexti->opcode = Op_push_array;
1293 $2->opcode = Op_in_array;
1295 $$ = list_append(list_merge($1, $3), $2);
1297 | exp a_relop exp %prec RELOP
1299 if (do_lint && $3->lasti->opcode == Op_match_rec)
1300 lintwarn_ln($2->source_line,
1301 _("regular expression on right of comparison"));
1302 $$ = list_append(list_merge($1, $3), $2);
1304 | exp '?' exp ':' exp
1305 { $$ = mk_condition($1, $2, $3, $4, $5); }
1315 | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
1317 $2->opcode = Op_assign_quotient;
1341 | common_exp simp_exp %prec CONCAT_OP
1344 bool is_simple_var = false;
1346 if ($1->lasti->opcode == Op_concat) {
1347 /* multiple (> 2) adjacent strings optimization */
1348 is_simple_var = ($1->lasti->concat_flag & CSVAR);
1349 count = $1->lasti->expr_count + 1;
1350 $1->lasti->opcode = Op_no_op;
1352 is_simple_var = ($1->nexti->opcode == Op_push
1353 && $1->lasti == $1->nexti); /* first exp. is a simple
1354 * variable?; kludge for use
1355 * in Op_assign_concat.
1360 && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
1361 && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
1363 NODE *n1 = $1->nexti->memory;
1364 NODE *n2 = $2->nexti->memory;
1367 n1 = force_string(n1);
1368 n2 = force_string(n2);
1369 nlen = n1->stlen + n2->stlen;
1370 erealloc(n1->stptr, char *, nlen + 2, "constant fold");
1371 memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
1373 n1->stptr[nlen] = '\0';
1374 n1->flags &= ~(NUMCUR|NUMBER|NUMINT);
1375 n1->flags |= (STRING|STRCUR);
1381 $$ = list_append(list_merge($1, $2), instruction(Op_concat));
1382 $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
1383 $$->lasti->expr_count = count;
1384 if (count > max_args)
1392 /* Binary operators in order of decreasing precedence. */
1393 | simp_exp '^' simp_exp
1394 { $$ = mk_binary($1, $3, $2); }
1395 | simp_exp '*' simp_exp
1396 { $$ = mk_binary($1, $3, $2); }
1397 | simp_exp '/' simp_exp
1398 { $$ = mk_binary($1, $3, $2); }
1399 | simp_exp '%' simp_exp
1400 { $$ = mk_binary($1, $3, $2); }
1401 | simp_exp '+' simp_exp
1402 { $$ = mk_binary($1, $3, $2); }
1403 | simp_exp '-' simp_exp
1404 { $$ = mk_binary($1, $3, $2); }
1405 | LEX_GETLINE opt_variable input_redir
1408 * In BEGINFILE/ENDFILE, allow `getline var < file'
1411 if (rule == BEGINFILE || rule == ENDFILE) {
1412 if ($2 != NULL && $3 != NULL)
1416 error_ln($1->source_line,
1417 _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
1419 error_ln($1->source_line,
1420 _("`getline' invalid inside `%s' rule"), ruletab[rule]);
1423 if (do_lint && rule == END && $3 == NULL)
1424 lintwarn_ln($1->source_line,
1425 _("non-redirected `getline' undefined inside END action"));
1426 $$ = mk_getline($1, $2, $3, redirect_input);
1428 | variable INCREMENT
1430 $2->opcode = Op_postincrement;
1431 $$ = mk_assignment($1, NULL, $2);
1433 | variable DECREMENT
1435 $2->opcode = Op_postdecrement;
1436 $$ = mk_assignment($1, NULL, $2);
1438 | '(' expression_list r_paren LEX_IN simple_variable
1441 warning_ln($4->source_line,
1442 _("old awk does not support the keyword `in' except after `for'"));
1443 warning_ln($4->source_line,
1444 _("old awk does not support multidimensional arrays"));
1446 $5->nexti->opcode = Op_push_array;
1447 $4->opcode = Op_in_array;
1448 if ($2 == NULL) { /* error */
1451 $$ = list_merge($5, $4);
1453 INSTRUCTION *t = $2;
1454 $4->expr_count = count_expressions(&t, false);
1455 $$ = list_append(list_merge(t, $5), $4);
1460 /* Expressions containing "| getline" lose the ability to be on the
1461 right-hand side of a concatenation. */
1463 : common_exp IO_IN LEX_GETLINE opt_variable
1465 $$ = mk_getline($3, $4, $1, $2->redir_type);
1468 /* Binary operators in order of decreasing precedence. */
1469 | simp_exp_nc '^' simp_exp
1470 { $$ = mk_binary($1, $3, $2); }
1471 | simp_exp_nc '*' simp_exp
1472 { $$ = mk_binary($1, $3, $2); }
1473 | simp_exp_nc '/' simp_exp
1474 { $$ = mk_binary($1, $3, $2); }
1475 | simp_exp_nc '%' simp_exp
1476 { $$ = mk_binary($1, $3, $2); }
1477 | simp_exp_nc '+' simp_exp
1478 { $$ = mk_binary($1, $3, $2); }
1479 | simp_exp_nc '-' simp_exp
1480 { $$ = mk_binary($1, $3, $2); }
1486 $$ = list_create($1);
1488 | '!' simp_exp %prec UNARY
1490 if ($2->opcode == Op_match_rec) {
1491 $2->opcode = Op_nomatch;
1492 $1->opcode = Op_push_i;
1493 $1->memory = make_number(0.0);
1494 $$ = list_append(list_append(list_create($1),
1495 instruction(Op_field_spec)), $2);
1497 if (do_optimize && $2->nexti == $2->lasti
1498 && $2->nexti->opcode == Op_push_i
1499 && ($2->nexti->memory->flags & (MPFN|MPZN)) == 0
1501 NODE *n = $2->nexti->memory;
1502 if ((n->flags & (STRCUR|STRING)) != 0) {
1503 n->numbr = (AWKNUM) (n->stlen == 0);
1504 n->flags &= ~(STRCUR|STRING);
1505 n->flags |= (NUMCUR|NUMBER);
1510 n->numbr = (AWKNUM) (n->numbr == 0.0);
1514 $1->opcode = Op_not;
1515 add_lint($2, LINT_assign_in_cond);
1516 $$ = list_append($2, $1);
1522 | LEX_BUILTIN '(' opt_expression_list r_paren
1528 | LEX_LENGTH '(' opt_expression_list r_paren
1536 static bool warned = false;
1538 if (do_lint && ! warned) {
1540 lintwarn_ln($1->source_line,
1541 _("call of `length' without parentheses is not portable"));
1543 $$ = snode(NULL, $1);
1549 | INCREMENT variable
1551 $1->opcode = Op_preincrement;
1552 $$ = mk_assignment($2, NULL, $1);
1554 | DECREMENT variable
1556 $1->opcode = Op_predecrement;
1557 $$ = mk_assignment($2, NULL, $1);
1561 $$ = list_create($1);
1565 $$ = list_create($1);
1567 | '-' simp_exp %prec UNARY
1569 if ($2->lasti->opcode == Op_push_i
1570 && ($2->lasti->memory->flags & (STRCUR|STRING)) == 0
1572 NODE *n = $2->lasti->memory;
1573 (void) force_number(n);
1578 $1->opcode = Op_unary_minus;
1579 $$ = list_append($2, $1);
1582 | '+' simp_exp %prec UNARY
1586 * POSIX semantics: force a conversion to numeric type
1588 $1->opcode = Op_plus_i;
1589 $1->memory = make_number(0.0);
1590 $$ = list_append($2, $1);
1597 func_use($1->lasti->func_name, FUNC_USE);
1600 | '@' direct_func_call
1602 /* indirect function call */
1606 static bool warned = false;
1607 const char *msg = _("indirect function calls are a gawk extension");
1609 if (do_traditional || do_posix)
1611 else if (do_lint && ! warned) {
1613 lintwarn("%s", msg);
1617 f->opcode = Op_indirect_func_call;
1618 name = estrdup(f->func_name, strlen(f->func_name));
1619 if (is_std_var(name))
1620 yyerror(_("can not use special variable `%s' for indirect function call"), name);
1621 indirect_var = variable(f->source_line, name, Node_var_new);
1622 t = instruction(Op_push);
1623 t->memory = indirect_var;
1625 /* prepend indirect var instead of appending to arguments (opt_expression_list),
1626 * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
1631 $$ = list_prepend($2, t);
1636 : FUNC_CALL '(' opt_expression_list r_paren
1639 $1->opcode = Op_func_call;
1640 $1->func_body = NULL;
1641 if ($3 == NULL) { /* no argument or error */
1642 ($1 + 1)->expr_count = 0;
1643 $$ = list_create($1);
1645 INSTRUCTION *t = $3;
1646 ($1 + 1)->expr_count = count_expressions(&t, true);
1647 $$ = list_append(t, $1);
1659 delete_subscript_list
1662 | delete_subscript SUBSCRIPT
1669 | delete_subscript delete_exp_list
1671 $$ = list_merge($1, $2);
1676 : bracketed_exp_list
1678 INSTRUCTION *ip = $1->lasti;
1679 int count = ip->sub_count; /* # of SUBSEP-seperated expressions */
1681 /* change Op_subscript or Op_sub_array to Op_concat */
1682 ip->opcode = Op_concat;
1683 ip->concat_flag = CSUBSEP;
1684 ip->expr_count = count;
1686 ip->opcode = Op_no_op;
1687 sub_counter++; /* count # of dimensions */
1693 : '[' expression_list ']'
1695 INSTRUCTION *t = $2;
1697 error_ln($3->source_line,
1698 _("invalid subscript expression"));
1699 /* install Null string as subscript. */
1700 t = list_create(instruction(Op_push_i));
1701 t->nexti->memory = dupnode(Nnull_string);
1704 $3->sub_count = count_expressions(&t, false);
1705 $$ = list_append(t, $3);
1710 : bracketed_exp_list
1712 | subscript bracketed_exp_list
1714 $$ = list_merge($1, $2);
1719 : subscript SUBSCRIPT
1726 char *var_name = $1->lextok;
1728 $1->opcode = Op_push;
1729 $1->memory = variable($1->source_line, var_name, Node_var_new);
1730 $$ = list_create($1);
1732 | NAME subscript_list
1734 char *arr = $1->lextok;
1735 $1->memory = variable($1->source_line, arr, Node_var_new);
1736 $1->opcode = Op_push_array;
1737 $$ = list_prepend($2, $1);
1744 INSTRUCTION *ip = $1->nexti;
1745 if (ip->opcode == Op_push
1746 && ip->memory->type == Node_var
1747 && ip->memory->var_update
1749 $$ = list_prepend($1, instruction(Op_var_update));
1750 $$->nexti->update_var = ip->memory->var_update;
1754 | '$' non_post_simp_exp opt_incdec
1756 $$ = list_append($2, $1);
1758 mk_assignment($2, NULL, $3);
1765 $1->opcode = Op_postincrement;
1769 $1->opcode = Op_postdecrement;
1771 | /* empty */ { $$ = NULL; }
1779 : '}' opt_nls { yyerrok; }
1796 : ':' { $$ = $1; yyerrok; }
1800 : ',' opt_nls { yyerrok; }
1805 const char *operator; /* text to match */
1806 OPCODE value; /* type */
1807 int class; /* lexical class */
1808 unsigned flags; /* # of args. allowed and compatability */
1809 # define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
1810 # define A(n) (1<<(n))
1811 # define VERSION_MASK 0xFF00 /* old awk is zero */
1812 # define NOT_OLD 0x0100 /* feature not in old awk */
1813 # define NOT_POSIX 0x0200 /* feature not in POSIX */
1814 # define GAWKX 0x0400 /* gawk extension */
1815 # define BREAK 0x0800 /* break allowed inside */
1816 # define CONTINUE 0x1000 /* continue allowed inside */
1818 NODE *(*ptr)(int); /* function that implements this keyword */
1819 NODE *(*ptr2)(int); /* alternate arbitrary-precision function */
1822 #if 'a' == 0x81 /* it's EBCDIC */
1823 /* tokcompare --- lexicographically compare token names for sorting */
1826 tokcompare(const void *l, const void *r)
1828 struct token *lhs, *rhs;
1830 lhs = (struct token *) l;
1831 rhs = (struct token *) r;
1833 return strcmp(lhs->operator, rhs->operator);
1838 * Tokentab is sorted ASCII ascending order, so it can be binary searched.
1839 * See check_special(), which sorts the table on EBCDIC systems.
1840 * Function pointers come from declarations in awk.h.
1844 #define MPF(F) do_mpfr_##F
1849 static const struct token tokentab[] = {
1850 {"BEGIN", Op_rule, LEX_BEGIN, 0, 0, 0},
1851 {"BEGINFILE", Op_rule, LEX_BEGINFILE, GAWKX, 0, 0},
1852 {"END", Op_rule, LEX_END, 0, 0, 0},
1853 {"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0},
1855 {"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump, 0},
1857 {"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)},
1858 {"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0},
1859 {"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asorti, 0},
1860 {"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2, MPF(atan2)},
1861 {"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain, 0},
1862 {"break", Op_K_break, LEX_BREAK, 0, 0, 0},
1863 {"case", Op_K_case, LEX_CASE, GAWKX, 0, 0},
1864 {"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close, 0},
1865 {"compl", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl, MPF(compl)},
1866 {"continue", Op_K_continue, LEX_CONTINUE, 0, 0, 0},
1867 {"cos", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos, MPF(cos)},
1868 {"dcgettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext, 0},
1869 {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0},
1870 {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0},
1871 {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0},
1872 {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0},
1873 {"else", Op_K_else, LEX_ELSE, 0, 0, 0},
1874 {"eval", Op_symbol, LEX_EVAL, 0, 0, 0},
1875 {"exit", Op_K_exit, LEX_EXIT, 0, 0, 0},
1876 {"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp, MPF(exp)},
1878 {"extension", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_ext, 0},
1880 {"fflush", Op_builtin, LEX_BUILTIN, A(0)|A(1), do_fflush, 0},
1881 {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0, 0},
1882 {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0, 0},
1883 {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0, 0},
1884 {"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0, 0},
1885 {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0, 0},
1886 {"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0, 0},
1887 {"if", Op_K_if, LEX_IF, 0, 0, 0},
1888 {"in", Op_symbol, LEX_IN, 0, 0, 0},
1889 {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0, 0},
1890 {"index", Op_builtin, LEX_BUILTIN, A(2), do_index, 0},
1891 {"int", Op_builtin, LEX_BUILTIN, A(1), do_int, MPF(int)},
1892 {"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray, 0},
1893 {"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length, 0},
1894 {"load", Op_symbol, LEX_LOAD, GAWKX, 0, 0},
1895 {"log", Op_builtin, LEX_BUILTIN, A(1), do_log, MPF(log)},
1896 {"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift, MPF(lshift)},
1897 {"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match, 0},
1898 {"mktime", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime, 0},
1899 {"next", Op_K_next, LEX_NEXT, 0, 0, 0},
1900 {"nextfile", Op_K_nextfile, LEX_NEXTFILE, 0, 0, 0},
1901 {"or", Op_builtin, LEX_BUILTIN, GAWKX, do_or, MPF(or)},
1902 {"patsplit", Op_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit, 0},
1903 {"print", Op_K_print, LEX_PRINT, 0, 0, 0},
1904 {"printf", Op_K_printf, LEX_PRINTF, 0, 0, 0},
1905 {"rand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand, MPF(rand)},
1906 {"return", Op_K_return, LEX_RETURN, NOT_OLD, 0, 0},
1907 {"rshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift, MPF(rshift)},
1908 {"sin", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin, MPF(sin)},
1909 {"split", Op_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split, 0},
1910 {"sprintf", Op_builtin, LEX_BUILTIN, 0, do_sprintf, 0},
1911 {"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)},
1912 {"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)},
1913 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
1914 {"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme, 0},
1916 {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
1917 {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)},
1918 {"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0, 0},
1919 {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr, 0},
1920 {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0, 0},
1921 {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system, 0},
1922 {"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime, 0},
1923 {"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower, 0},
1924 {"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper, 0},
1925 {"while", Op_K_while, LEX_WHILE, BREAK|CONTINUE, 0, 0},
1926 {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)},
1930 /* Variable containing the current shift state. */
1931 static mbstate_t cur_mbstate;
1932 /* Ring buffer containing current characters. */
1933 #define MAX_CHAR_IN_RING_BUFFER 8
1934 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1935 static char cur_char_ring[RING_BUFFER_SIZE];
1936 /* Index for ring buffers. */
1937 static int cur_ring_idx;
1938 /* This macro means that last nextc() return a singlebyte character
1939 or 1st byte of a multibyte character. */
1940 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1941 #else /* MBS_SUPPORT */
1943 #define nextc_is_1stbyte 1
1944 #endif /* MBS_SUPPORT */
1946 /* getfname --- return name of a builtin function (for pretty printing) */
1949 getfname(NODE *(*fptr)(int))
1953 j = sizeof(tokentab) / sizeof(tokentab[0]);
1954 /* linear search, no other way to do it */
1955 for (i = 0; i < j; i++)
1956 if (tokentab[i].ptr == fptr)
1957 return tokentab[i].operator;
1962 /* negate_num --- negate a number in NODE */
1971 if (! is_mpg_number(n)) {
1972 n->numbr = -n->numbr;
1977 if (is_mpg_integer(n)) {
1979 mpz_neg(n->mpg_i, n->mpg_i);
1984 * 0 --> -0 conversion. Requires turning the MPG integer
1985 * into an MPFR float.
1988 mpz_clear(n->mpg_i); /* release the integer storage */
1990 /* Convert and fall through. */
1991 tval = mpfr_set_d(n->mpg_numbr, 0.0, ROUND_MODE);
1992 IEEE_FMT(n->mpg_numbr, tval);
1997 /* mpfr float case */
1998 tval = mpfr_neg(n->mpg_numbr, n->mpg_numbr, ROUND_MODE);
1999 IEEE_FMT(n->mpg_numbr, tval);
2003 /* print_included_from --- print `Included from ..' file names and locations */
2006 print_included_from()
2011 /* suppress current file name, line # from `.. included from ..' msgs */
2012 saveline = sourceline;
2015 for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
2017 if (s == NULL || s->fd <= INVALID_HANDLE)
2021 /* if last token is NEWLINE, line number is off by 1. */
2022 if (s->lasttok == NEWLINE)
2025 s->prev == sourcefile ? "In file included from"
2027 (s->stype == SRC_INC ||
2028 s->stype == SRC_FILE) ? s->src : "cmd. line",
2030 s->stype == SRC_INC ? ',' : ':'
2033 sourceline = saveline;
2036 /* warning_ln --- print a warning message with location */
2039 warning_ln(int line, const char *mesg, ...)
2044 saveline = sourceline;
2046 print_included_from();
2047 va_start(args, mesg);
2048 err(false, _("warning: "), mesg, args);
2050 sourceline = saveline;
2053 /* lintwarn_ln --- print a lint warning and location */
2056 lintwarn_ln(int line, const char *mesg, ...)
2061 saveline = sourceline;
2063 print_included_from();
2064 va_start(args, mesg);
2065 if (lintfunc == r_fatal)
2066 err(true, _("fatal: "), mesg, args);
2068 err(false, _("warning: "), mesg, args);
2070 sourceline = saveline;
2071 if (lintfunc == r_fatal)
2072 gawk_exit(EXIT_FATAL);
2075 /* error_ln --- print an error message and location */
2078 error_ln(int line, const char *m, ...)
2083 saveline = sourceline;
2085 print_included_from();
2088 err(false, "error: ", m, args);
2090 sourceline = saveline;
2093 /* yyerror --- print a syntax error message, show where */
2096 yyerror(const char *m, ...)
2099 const char *mesg = NULL;
2104 static char end_of_file_line[] = "(END OF FILE)";
2107 print_included_from();
2110 /* Find the current line in the input file */
2111 if (lexptr && lexeme) {
2112 if (thisline == NULL) {
2116 mesg = _("unexpected newline or end of string");
2118 for (; cp != lexptr_begin && *cp != '\n'; --cp)
2124 /* NL isn't guaranteed */
2126 while (bp < lexend && *bp && *bp != '\n')
2129 thisline = end_of_file_line;
2130 bp = thisline + strlen(thisline);
2134 * Saving and restoring *bp keeps valgrind happy,
2135 * since the guts of glibc uses strlen, even though
2136 * we're passing an explict precision. Sigh.
2138 * 8/2003: We may not need this anymore.
2143 msg("%.*s", (int) (bp - thisline), thisline);
2150 count = (bp - thisline) + strlen(mesg) + 2 + 1;
2151 emalloc(buf, char *, count, "yyerror");
2155 if (lexptr != NULL) {
2157 while (scan < lexeme)
2158 if (*scan++ == '\t')
2166 err(false, "", buf, args);
2171 /* mk_program --- create a single list of instructions */
2173 static INSTRUCTION *
2176 INSTRUCTION *cp, *tmp;
2178 #define begin_block rule_block[BEGIN]
2179 #define end_block rule_block[END]
2180 #define prog_block rule_block[Rule]
2181 #define beginfile_block rule_block[BEGINFILE]
2182 #define endfile_block rule_block[ENDFILE]
2184 if (end_block == NULL)
2185 end_block = list_create(ip_end);
2187 (void) list_prepend(end_block, ip_end);
2189 if (! in_main_context()) {
2190 if (begin_block != NULL && prog_block != NULL)
2191 cp = list_merge(begin_block, prog_block);
2193 cp = (begin_block != NULL) ? begin_block : prog_block;
2196 (void) list_merge(cp, end_block);
2200 (void) list_append(cp, instruction(Op_stop));
2204 if (endfile_block == NULL)
2205 endfile_block = list_create(ip_endfile);
2207 ip_rec->has_endfile = true;
2208 (void) list_prepend(endfile_block, ip_endfile);
2211 if (beginfile_block == NULL)
2212 beginfile_block = list_create(ip_beginfile);
2214 (void) list_prepend(beginfile_block, ip_beginfile);
2216 if (prog_block == NULL) {
2217 if (end_block->nexti == end_block->lasti
2218 && beginfile_block->nexti == beginfile_block->lasti
2219 && endfile_block->nexti == endfile_block->lasti
2221 /* no pattern-action and (real) end, beginfile or endfile blocks */
2224 ip_rec = ip_newfile = NULL;
2226 list_append(beginfile_block, instruction(Op_after_beginfile));
2227 (void) list_append(endfile_block, instruction(Op_after_endfile));
2229 if (begin_block == NULL) /* no program at all */
2232 cp = list_merge(begin_block, end_block);
2233 (void) list_append(cp, ip_atexit);
2234 (void) list_append(cp, instruction(Op_stop));
2236 /* append beginfile_block and endfile_block for sole use
2237 * in getline without redirection (Op_K_getline).
2240 (void) list_merge(cp, beginfile_block);
2241 (void) list_merge(cp, endfile_block);
2246 /* install a do-nothing prog block */
2247 prog_block = list_create(instruction(Op_no_op));
2251 (void) list_append(endfile_block, instruction(Op_after_endfile));
2252 (void) list_prepend(prog_block, ip_rec);
2253 (void) list_append(prog_block, instruction(Op_jmp));
2254 prog_block->lasti->target_jmp = ip_rec;
2256 list_append(beginfile_block, instruction(Op_after_beginfile));
2258 cp = list_merge(beginfile_block, prog_block);
2259 (void) list_prepend(cp, ip_newfile);
2260 (void) list_merge(cp, endfile_block);
2261 (void) list_merge(cp, end_block);
2262 if (begin_block != NULL)
2263 cp = list_merge(begin_block, cp);
2265 (void) list_append(cp, ip_atexit);
2266 (void) list_append(cp, instruction(Op_stop));
2269 /* delete the Op_list, not needed */
2277 #undef beginfile_block
2278 #undef endfile_block
2281 /* parse_program --- read in the program and convert into a list of instructions */
2284 parse_program(INSTRUCTION **pcode)
2288 /* pre-create non-local jump targets
2289 * ip_end (Op_no_op) -- used as jump target for `exit'
2290 * outside an END block.
2292 ip_end = instruction(Op_no_op);
2294 if (! in_main_context())
2295 ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
2297 ip_endfile = instruction(Op_no_op);
2298 ip_beginfile = instruction(Op_no_op);
2299 ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
2300 ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
2301 ip_newfile->target_jmp = ip_end;
2302 ip_newfile->target_endfile = ip_endfile;
2303 (ip_newfile + 1)->target_get_record = ip_rec;
2304 ip_rec->target_newfile = ip_newfile;
2305 ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */
2308 for (sourcefile = srcfiles->next; sourcefile->stype == SRC_EXTLIB;
2309 sourcefile = sourcefile->next)
2315 memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
2317 tok = tokstart != NULL ? tokstart : tokexpand();
2320 *pcode = mk_program();
2322 /* avoid false source indications */
2325 if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */
2328 if (args_array == NULL)
2329 emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2331 erealloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2333 return (ret || errcount);
2336 /* do_add_srcfile --- add one item to srcfiles */
2339 do_add_srcfile(enum srctype stype, char *src, char *path, SRCFILE *thisfile)
2343 emalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
2344 memset(s, 0, sizeof(SRCFILE));
2345 s->src = estrdup(src, strlen(src));
2348 s->fd = INVALID_HANDLE;
2350 s->prev = thisfile->prev;
2351 thisfile->prev->next = s;
2356 /* add_srcfile --- add one item to srcfiles after checking if
2357 * a source file exists and not already in list.
2361 add_srcfile(enum srctype stype, char *src, SRCFILE *thisfile, bool *already_included, int *errcode)
2368 if (already_included)
2369 *already_included = false;
2372 if (stype == SRC_CMDLINE || stype == SRC_STDIN)
2373 return do_add_srcfile(stype, src, NULL, thisfile);
2375 path = find_source(src, & sbuf, &errno_val, stype == SRC_EXTLIB);
2378 *errcode = errno_val;
2381 /* use full messages to ease translation */
2382 fatal(stype != SRC_EXTLIB
2383 ? _("can't open source file `%s' for reading (%s)")
2384 : _("can't open shared library `%s' for reading (%s)"),
2386 errno_val ? strerror(errno_val) : _("reason unknown"));
2389 /* N.B. We do not eliminate duplicate SRC_FILE (-f) programs. */
2390 for (s = srcfiles->next; s != srcfiles; s = s->next) {
2391 if ((s->stype == SRC_FILE || s->stype == SRC_INC || s->stype == SRC_EXTLIB) && files_are_same(path, s)) {
2392 if (stype == SRC_INC || stype == SRC_EXTLIB) {
2393 /* eliminate duplicates */
2394 if ((stype == SRC_INC) && (s->stype == SRC_FILE))
2395 fatal(_("can't include `%s' and use it as a program file"), src);
2398 int line = sourceline;
2399 /* Kludge: the line number may be off for `@include file'.
2400 * Since, this function is also used for '-f file' in main.c,
2401 * sourceline > 1 check ensures that the call is at
2404 if (sourceline > 1 && lasttok == NEWLINE)
2408 ? _("already included source file `%s'")
2409 : _("already loaded shared library `%s'"),
2413 if (already_included)
2414 *already_included = true;
2417 /* duplicates are allowed for -f */
2418 if (s->stype == SRC_INC)
2419 fatal(_("can't include `%s' and use it as a program file"), src);
2420 /* no need to scan for further matches, since
2421 * they must be of homogeneous type */
2427 s = do_add_srcfile(stype, src, path, thisfile);
2429 s->mtime = sbuf.st_mtime;
2433 /* include_source --- read program from source included using `@include' */
2436 include_source(INSTRUCTION *file)
2439 char *src = file->lextok;
2441 bool already_included;
2443 if (do_traditional || do_posix) {
2444 error_ln(file->source_line, _("@include is a gawk extension"));
2448 if (strlen(src) == 0) {
2450 lintwarn_ln(file->source_line, _("empty filename after @include"));
2454 s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
2456 if (already_included)
2458 error_ln(file->source_line,
2459 _("can't open source file `%s' for reading (%s)"),
2460 src, errcode ? strerror(errcode) : _("reason unknown"));
2464 /* save scanner state for the current sourcefile */
2465 sourcefile->srclines = sourceline;
2466 sourcefile->lexptr = lexptr;
2467 sourcefile->lexend = lexend;
2468 sourcefile->lexptr_begin = lexptr_begin;
2469 sourcefile->lexeme = lexeme;
2470 sourcefile->lasttok = lasttok;
2472 /* included file becomes the current source */
2483 /* load_library --- load a shared library */
2486 load_library(INSTRUCTION *file)
2489 char *src = file->lextok;
2491 bool already_included;
2493 if (do_traditional || do_posix) {
2494 error_ln(file->source_line, _("@load is a gawk extension"));
2498 if (strlen(src) == 0) {
2500 lintwarn_ln(file->source_line, _("empty filename after @load"));
2504 s = add_srcfile(SRC_EXTLIB, src, sourcefile, &already_included, &errcode);
2506 if (already_included)
2508 error_ln(file->source_line,
2509 _("can't open shared library `%s' for reading (%s)"),
2510 src, errcode ? strerror(errcode) : _("reason unknown"));
2514 load_ext(s->fullpath);
2518 /* next_sourcefile --- read program from the next source in srcfiles */
2523 static int (*closefunc)(int fd) = NULL;
2525 if (closefunc == NULL) {
2526 char *cp = getenv("AWKREADFUNC");
2528 /* If necessary, one day, test value for different functions. */
2532 closefunc = one_line_close;
2536 * This won't be true if there's an invalid character in
2537 * the source file or source string (e.g., user typo).
2538 * Previous versions of gawk did not core dump in such a
2541 * assert(lexeof == true);
2546 sourcefile->srclines = sourceline; /* total no of lines in current file */
2547 if (sourcefile->fd > INVALID_HANDLE) {
2548 if (sourcefile->fd != fileno(stdin)) /* safety */
2549 (*closefunc)(sourcefile->fd);
2550 sourcefile->fd = INVALID_HANDLE;
2552 if (sourcefile->buf != NULL) {
2553 efree(sourcefile->buf);
2554 sourcefile->buf = NULL;
2555 sourcefile->lexptr_begin = NULL;
2558 while ((sourcefile = sourcefile->next) != NULL) {
2559 if (sourcefile == srcfiles)
2561 if (sourcefile->stype != SRC_EXTLIB)
2565 if (sourcefile->lexptr_begin != NULL) {
2566 /* resume reading from already opened file (postponed to process '@include') */
2567 lexptr = sourcefile->lexptr;
2568 lexend = sourcefile->lexend;
2569 lasttok = sourcefile->lasttok;
2570 lexptr_begin = sourcefile->lexptr_begin;
2571 lexeme = sourcefile->lexeme;
2572 sourceline = sourcefile->srclines;
2573 source = sourcefile->src;
2582 /* get_src_buf --- read the next buffer of source program */
2594 * No argument prototype on readfunc on purpose,
2595 * avoids problems with some ancient systems where
2596 * the types of arguments to read() aren't up to date.
2598 static ssize_t (*readfunc)() = 0;
2600 if (readfunc == NULL) {
2601 char *cp = getenv("AWKREADFUNC");
2603 /* If necessary, one day, test value for different functions. */
2606 * cast is to remove warnings on systems with
2607 * different return types for read.
2609 readfunc = ( ssize_t(*)() ) read;
2611 readfunc = read_one_line;
2615 if (sourcefile == srcfiles)
2618 if (sourcefile->stype == SRC_CMDLINE) {
2619 if (sourcefile->bufsize == 0) {
2620 sourcefile->bufsize = strlen(sourcefile->src);
2621 lexptr = lexptr_begin = lexeme = sourcefile->src;
2622 lexend = lexptr + sourcefile->bufsize;
2624 if (sourcefile->bufsize == 0) {
2626 * Yet Another Special case:
2627 * gawk '' /path/name
2630 static bool warned = false;
2632 if (do_lint && ! warned) {
2634 lintwarn(_("empty program text on command line"));
2638 } else if (sourcefile->buf == NULL && *(lexptr-1) != '\n') {
2640 * The following goop is to ensure that the source
2641 * ends with a newline and that the entire current
2642 * line is available for error messages.
2647 offset = lexptr - lexeme;
2648 for (scan = lexeme; scan > lexptr_begin; scan--)
2649 if (*scan == '\n') {
2653 savelen = lexptr - scan;
2654 emalloc(buf, char *, savelen + 1, "get_src_buf");
2655 memcpy(buf, scan, savelen);
2657 lexptr = buf + savelen;
2659 lexeme = lexptr - offset;
2661 lexend = lexptr + 1;
2662 sourcefile->buf = buf;
2668 if (sourcefile->fd <= INVALID_HANDLE) {
2672 source = sourcefile->src;
2675 fd = srcopen(sourcefile);
2676 if (fd <= INVALID_HANDLE) {
2679 /* suppress file name and line no. in error mesg */
2682 error(_("can't open source file `%s' for reading (%s)"),
2683 in, strerror(errno));
2686 return sourcefile->src;
2689 sourcefile->fd = fd;
2690 l = optimal_bufsize(fd, &sbuf);
2692 * Make sure that something silly like
2693 * AWKBUFSIZE=8 make check
2696 #define A_DECENT_BUFFER_SIZE 128
2697 if (l < A_DECENT_BUFFER_SIZE)
2698 l = A_DECENT_BUFFER_SIZE;
2699 #undef A_DECENT_BUFFER_SIZE
2700 sourcefile->bufsize = l;
2702 emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2703 lexptr = lexptr_begin = lexeme = sourcefile->buf;
2709 * Here, we retain the current source line in the beginning of the buffer.
2712 for (scan = lexeme; scan > lexptr_begin; scan--)
2713 if (*scan == '\n') {
2718 savelen = lexptr - scan;
2719 offset = lexptr - lexeme;
2723 * Need to make sure we have room left for reading new text;
2724 * grow the buffer (by doubling, an arbitrary choice), if the retained line
2725 * takes up more than a certain percentage (50%, again an arbitrary figure)
2726 * of the available space.
2729 if (savelen > sourcefile->bufsize / 2) { /* long line or token */
2730 sourcefile->bufsize *= 2;
2731 erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2732 scan = sourcefile->buf + (scan - lexptr_begin);
2733 lexptr_begin = sourcefile->buf;
2736 thisline = lexptr_begin;
2737 memmove(thisline, scan, savelen);
2738 lexptr = thisline + savelen;
2739 lexeme = lexptr - offset;
2742 lexptr = lexeme = lexptr_begin;
2747 n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
2749 error(_("can't read sourcefile `%s' (%s)"),
2750 source, strerror(errno));
2754 lexend = lexptr + n;
2756 static bool warned = false;
2757 if (do_lint && newfile && ! warned){
2760 lintwarn(_("source file `%s' is empty"), source);
2765 return sourcefile->buf;
2768 /* tokadd --- add a character to the token buffer */
2770 #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
2772 /* tokexpand --- grow the token buffer */
2780 if (tokstart != NULL) {
2781 tokoffset = tok - tokstart;
2783 erealloc(tokstart, char *, toksize, "tokexpand");
2784 tok = tokstart + tokoffset;
2787 emalloc(tokstart, char *, toksize, "tokexpand");
2790 tokend = tokstart + toksize;
2794 /* nextc --- get the next input character */
2801 if (gawk_mb_cur_max > 1) {
2805 if (lexptr == NULL || lexptr >= lexend) {
2811 /* Update the buffer index. */
2812 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
2815 /* Did we already check the current character? */
2816 if (cur_char_ring[cur_ring_idx] == 0) {
2817 /* No, we need to check the next character on the buffer. */
2818 int idx, work_ring_idx = cur_ring_idx;
2819 mbstate_t tmp_state;
2822 for (idx = 0 ; lexptr + idx < lexend ; idx++) {
2823 tmp_state = cur_mbstate;
2824 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
2826 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
2827 /* It is a singlebyte character, non-complete multibyte
2828 character or EOF. We treat it as a singlebyte
2830 cur_char_ring[work_ring_idx] = 1;
2832 } else if (mbclen == (size_t)-2) {
2833 /* It is not a complete multibyte character. */
2834 cur_char_ring[work_ring_idx] = idx + 1;
2837 cur_char_ring[work_ring_idx] = mbclen;
2840 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2841 0 : work_ring_idx + 1;
2843 cur_mbstate = tmp_state;
2845 /* Put a mark on the position on which we write next character. */
2846 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2847 0 : work_ring_idx + 1;
2848 cur_char_ring[work_ring_idx] = 0;
2851 return (int) (unsigned char) *lexptr++;
2856 if (lexptr && lexptr < lexend)
2857 return ((int) (unsigned char) *lexptr++);
2858 } while (get_src_buf());
2863 #else /* MBS_SUPPORT */
2871 if (lexptr && lexptr < lexend)
2872 return ((int) (unsigned char) *lexptr++);
2873 } while (get_src_buf());
2877 #endif /* MBS_SUPPORT */
2879 /* pushback --- push a character back on the input */
2885 if (gawk_mb_cur_max > 1)
2886 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
2889 (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
2893 /* allow_newline --- allow newline after &&, ||, ? and : */
2902 if (c == END_FILE) {
2907 while ((c = nextc()) != '\n' && c != END_FILE)
2909 if (c == END_FILE) {
2923 /* newline_eof --- return newline or EOF as needed and adjust variables */
2926 * This routine used to be a macro, however GCC 4.6.2 warned about
2927 * the result of a computation not being used. Converting to a function
2928 * removes the warnings.
2931 static int newline_eof()
2933 /* NB: a newline at end does not start a source line. */
2934 if (lasttok != NEWLINE) {
2936 if (do_lint && ! eof_warned) {
2937 lintwarn(_("source file does not end in newline"));
2949 /* yylex --- Read the input and turn it into tokens. */
2955 bool seen_e = false; /* These are for numbers */
2956 bool seen_point = false;
2957 bool esc_seen; /* for literal strings */
2960 static bool did_newline = false;
2963 bool intlstr = false;
2966 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
2968 #define NEWLINE_EOF newline_eof()
2970 yylval = (INSTRUCTION *) NULL;
2971 if (lasttok == SUBSCRIPT) {
2976 if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
2983 return lasttok = NEWLINE_EOF;
2988 * added for OS/2's extproc feature of cmd.exe
2989 * (like #! in BSD sh)
2991 if (strncasecmp(lexptr, "extproc ", 8) == 0) {
2992 while (*lexptr && *lexptr != '\n')
3000 int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
3002 * Counting brackets is non-trivial. [[] is ok,
3003 * and so is [\]], with a point being that /[/]/ as a regexp
3004 * constant has to work.
3006 * Do not count [ or ] if either one is preceded by a \.
3007 * A `[' should be counted if
3008 * a) it is the first one so far (in_brack == 0)
3009 * b) it is the `[' in `[:'
3010 * A ']' should be counted if not preceded by a \, since
3011 * it is either closing `:]' or just a plain list.
3012 * According to POSIX, []] is how you put a ] into a set.
3013 * Try to handle that too.
3015 * The code for \ handles \[ and \].
3018 want_regexp = false;
3023 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
3025 /* one day check for `.' and `=' too */
3026 if (nextc() == ':' || in_brack == 0)
3031 if (tokstart[0] == '['
3032 && (tok == tokstart + 1
3033 || (tok == tokstart + 2
3034 && tokstart[1] == '^')))
3040 if ((c = nextc()) == END_FILE) {
3042 yyerror(_("unterminated regexp ends with `\\' at end of file"));
3043 goto end_regexp; /* kludge */
3044 } else if (c == '\n') {
3053 case '/': /* end of the regexp */
3057 yylval = GET_INSTRUCTION(Op_token);
3058 yylval->lextok = estrdup(tokstart, tok - tokstart);
3063 if (peek == 'i' || peek == 's') {
3066 _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
3067 source, sourceline, peek);
3070 _("tawk regex modifier `/.../%c' doesn't work in gawk"),
3074 return lasttok = REGEXP;
3077 yyerror(_("unterminated regexp"));
3078 goto end_regexp; /* kludge */
3081 yyerror(_("unterminated regexp at end of file"));
3082 goto end_regexp; /* kludge */
3089 /* skipping \r is a hack, but windows is just too pervasive. sigh. */
3090 while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
3093 lexeme = lexptr ? lexptr - 1 : lexptr;
3098 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
3105 return lasttok = NEWLINE_EOF;
3109 return lasttok = NEWLINE;
3111 case '#': /* it's a comment */
3112 while ((c = nextc()) != '\n') {
3114 return lasttok = NEWLINE_EOF;
3117 return lasttok = NEWLINE;
3120 return lasttok = '@';
3123 #ifdef RELAXED_CONTINUATION
3125 * This code puports to allow comments and/or whitespace
3126 * after the `\' at the end of a line used for continuation.
3127 * Use it at your own risk. We think it's a bad idea, which
3128 * is why it's not on by default.
3130 if (! do_traditional) {
3131 /* strip trailing white-space and/or comment */
3132 while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
3135 static bool warned = false;
3137 if (do_lint && ! warned) {
3140 _("use of `\\ #...' line continuation is not portable"));
3142 while ((c = nextc()) != '\n')
3148 #endif /* RELAXED_CONTINUATION */
3150 if (c == '\r') /* allow MS-DOS files. bleah */
3156 yyerror(_("backslash not last character on line"));
3157 return lasttok = LEX_EOF;
3163 yylval = GET_INSTRUCTION(Op_cond_exp);
3169 * in_parens is undefined unless we are parsing a print
3170 * statement (in_print), but why bother with a check?
3180 yylval = GET_INSTRUCTION(Op_field_spec);
3183 if (++in_braces == 1)
3184 firstline = sourceline;
3193 yylval = GET_INSTRUCTION(Op_sub_array);
3196 yylval = GET_INSTRUCTION(Op_subscript);
3197 lasttok = SUBSCRIPT; /* end of subscripts */
3202 if ((c = nextc()) == '=') {
3203 yylval = GET_INSTRUCTION(Op_assign_times);
3204 return lasttok = ASSIGNOP;
3205 } else if (do_posix) {
3207 yylval = GET_INSTRUCTION(Op_times);
3208 return lasttok = '*';
3209 } else if (c == '*') {
3210 /* make ** and **= aliases for ^ and ^= */
3211 static bool did_warn_op = false, did_warn_assgn = false;
3213 if (nextc() == '=') {
3214 if (! did_warn_assgn) {
3215 did_warn_assgn = true;
3217 lintwarn(_("POSIX does not allow operator `**='"));
3219 warning(_("old awk does not support operator `**='"));
3221 yylval = GET_INSTRUCTION(Op_assign_exp);
3225 if (! did_warn_op) {
3228 lintwarn(_("POSIX does not allow operator `**'"));
3230 warning(_("old awk does not support operator `**'"));
3232 yylval = GET_INSTRUCTION(Op_exp);
3233 return lasttok = '^';
3237 yylval = GET_INSTRUCTION(Op_times);
3238 return lasttok = '*';
3241 if (nextc() == '=') {
3243 return lasttok = SLASH_BEFORE_EQUAL;
3246 yylval = GET_INSTRUCTION(Op_quotient);
3247 return lasttok = '/';
3250 if (nextc() == '=') {
3251 yylval = GET_INSTRUCTION(Op_assign_mod);
3252 return lasttok = ASSIGNOP;
3255 yylval = GET_INSTRUCTION(Op_mod);
3256 return lasttok = '%';
3260 static bool did_warn_op = false, did_warn_assgn = false;
3262 if (nextc() == '=') {
3263 if (do_lint_old && ! did_warn_assgn) {
3264 did_warn_assgn = true;
3265 warning(_("operator `^=' is not supported in old awk"));
3267 yylval = GET_INSTRUCTION(Op_assign_exp);
3268 return lasttok = ASSIGNOP;
3271 if (do_lint_old && ! did_warn_op) {
3273 warning(_("operator `^' is not supported in old awk"));
3275 yylval = GET_INSTRUCTION(Op_exp);
3276 return lasttok = '^';
3280 if ((c = nextc()) == '=') {
3281 yylval = GET_INSTRUCTION(Op_assign_plus);
3282 return lasttok = ASSIGNOP;
3285 yylval = GET_INSTRUCTION(Op_symbol);
3286 return lasttok = INCREMENT;
3289 yylval = GET_INSTRUCTION(Op_plus);
3290 return lasttok = '+';
3293 if ((c = nextc()) == '=') {
3294 yylval = GET_INSTRUCTION(Op_notequal);
3295 return lasttok = RELOP;
3298 yylval = GET_INSTRUCTION(Op_nomatch);
3299 return lasttok = MATCHOP;
3302 yylval = GET_INSTRUCTION(Op_symbol);
3303 return lasttok = '!';
3306 if (nextc() == '=') {
3307 yylval = GET_INSTRUCTION(Op_leq);
3308 return lasttok = RELOP;
3310 yylval = GET_INSTRUCTION(Op_less);
3312 return lasttok = '<';
3315 if (nextc() == '=') {
3316 yylval = GET_INSTRUCTION(Op_equal);
3317 return lasttok = RELOP;
3319 yylval = GET_INSTRUCTION(Op_assign);
3321 return lasttok = ASSIGN;
3324 if ((c = nextc()) == '=') {
3325 yylval = GET_INSTRUCTION(Op_geq);
3326 return lasttok = RELOP;
3327 } else if (c == '>') {
3328 yylval = GET_INSTRUCTION(Op_symbol);
3329 yylval->redir_type = redirect_append;
3330 return lasttok = IO_OUT;
3333 if (in_print && in_parens == 0) {
3334 yylval = GET_INSTRUCTION(Op_symbol);
3335 yylval->redir_type = redirect_output;
3336 return lasttok = IO_OUT;
3338 yylval = GET_INSTRUCTION(Op_greater);
3339 return lasttok = '>';
3342 yylval = GET_INSTRUCTION(Op_match);
3343 return lasttok = MATCHOP;
3347 * Added did newline stuff. Easier than
3348 * hacking the grammar.
3351 did_newline = false;
3352 if (--in_braces == 0)
3353 lastline = sourceline;
3357 --lexptr; /* pick up } next time */
3358 return lasttok = NEWLINE;
3363 while ((c = nextc()) != '"') {
3366 yyerror(_("unterminated string"));
3367 return lasttok = LEX_EOF;
3369 if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
3377 if (! want_source || c != '"')
3380 if (c == END_FILE) {
3382 yyerror(_("unterminated string"));
3383 return lasttok = LEX_EOF;
3387 yylval = GET_INSTRUCTION(Op_token);
3389 yylval->lextok = estrdup(tokstart, tok - tokstart);
3390 return lasttok = FILENAME;
3393 yylval->opcode = Op_push_i;
3394 yylval->memory = make_str_node(tokstart,
3395 tok - tokstart, esc_seen ? SCAN : 0);
3397 yylval->memory->flags |= INTLSTR;
3400 dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
3402 return lasttok = YSTRING;
3405 if ((c = nextc()) == '=') {
3406 yylval = GET_INSTRUCTION(Op_assign_minus);
3407 return lasttok = ASSIGNOP;
3410 yylval = GET_INSTRUCTION(Op_symbol);
3411 return lasttok = DECREMENT;
3414 yylval = GET_INSTRUCTION(Op_minus);
3415 return lasttok = '-';
3421 return lasttok = '.';
3437 bool gotnumber = false;
3445 if (tok == tokstart + 2) {
3448 if (isxdigit(peek)) {
3450 pushback(); /* following digit */
3452 pushback(); /* x or X */
3458 /* period ends exponent part of floating point number */
3459 if (seen_point || seen_e) {
3474 if ((c = nextc()) == '-' || c == '+') {
3481 pushback(); /* non-digit after + or - */
3482 pushback(); /* + or - */
3483 pushback(); /* e or E */
3485 } else if (! isdigit(c)) {
3486 pushback(); /* character after e or E */
3487 pushback(); /* e or E */
3489 pushback(); /* digit */
3502 if (do_traditional || ! inhex)
3527 yylval = GET_INSTRUCTION(Op_push_i);
3530 if (! do_traditional) {
3531 base = get_numbase(tokstart, false);
3534 lintwarn("numeric constant `%.*s' treated as octal",
3535 (int) strlen(tokstart)-1, tokstart);
3536 else if (base == 16)
3537 lintwarn("numeric constant `%.*s' treated as hexadecimal",
3538 (int) strlen(tokstart)-1, tokstart);
3546 if (! seen_point && ! seen_e) {
3548 mpg_strtoui(r->mpg_i, tokstart, strlen(tokstart), NULL, base);
3553 tval = mpfr_strtofr(r->mpg_numbr, tokstart, NULL, base, ROUND_MODE);
3555 IEEE_FMT(r->mpg_numbr, tval);
3558 return lasttok = YNUMBER;
3562 d = nondec2awknum(tokstart, strlen(tokstart));
3565 yylval->memory = make_number(d);
3566 if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d)
3567 yylval->memory->flags |= NUMINT;
3568 return lasttok = YNUMBER;
3571 if ((c = nextc()) == '&') {
3572 yylval = GET_INSTRUCTION(Op_and);
3574 return lasttok = LEX_AND;
3577 yylval = GET_INSTRUCTION(Op_symbol);
3578 return lasttok = '&';
3581 if ((c = nextc()) == '|') {
3582 yylval = GET_INSTRUCTION(Op_or);
3584 return lasttok = LEX_OR;
3585 } else if (! do_traditional && c == '&') {
3586 yylval = GET_INSTRUCTION(Op_symbol);
3587 yylval->redir_type = redirect_twoway;
3588 return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
3591 if (in_print && in_parens == 0) {
3592 yylval = GET_INSTRUCTION(Op_symbol);
3593 yylval->redir_type = redirect_pipe;
3594 return lasttok = IO_OUT;
3596 yylval = GET_INSTRUCTION(Op_symbol);
3597 yylval->redir_type = redirect_pipein;
3598 return lasttok = IO_IN;
3602 if (c != '_' && ! isalpha(c)) {
3603 yyerror(_("invalid char '%c' in expression"), c);
3604 return lasttok = LEX_EOF;
3608 * Lots of fog here. Consider:
3610 * print "xyzzy"$_"foo"
3612 * Without the check for ` lasttok != '$' ', this is parsed as
3614 * print "xxyzz" $(_"foo")
3616 * With the check, it is "correctly" parsed as three
3617 * string concatenations. Sigh. This seems to be
3618 * "more correct", but this is definitely one of those
3619 * occasions where the interactions are funny.
3621 if (! do_traditional && c == '_' && lasttok != '$') {
3622 if ((c = nextc()) == '"') {
3630 /* it's some type of name-type-thing. Find its length. */
3632 while (c != END_FILE && is_identchar(c)) {
3639 /* See if it is a special token. */
3640 if ((mid = check_special(tokstart)) >= 0) {
3641 static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
3642 int class = tokentab[mid].class;
3644 if ((class == LEX_INCLUDE || class == LEX_LOAD || class == LEX_EVAL)
3649 if ((tokentab[mid].flags & GAWKX) != 0 && (warntab[mid] & GAWKX) == 0) {
3650 lintwarn(_("`%s' is a gawk extension"),
3651 tokentab[mid].operator);
3652 warntab[mid] |= GAWKX;
3654 if ((tokentab[mid].flags & NOT_POSIX) != 0 && (warntab[mid] & NOT_POSIX) == 0) {
3655 lintwarn(_("POSIX does not allow `%s'"),
3656 tokentab[mid].operator);
3657 warntab[mid] |= NOT_POSIX;
3660 if (do_lint_old && (tokentab[mid].flags & NOT_OLD) != 0
3661 && (warntab[mid] & NOT_OLD) == 0
3663 warning(_("`%s' is not supported in old awk"),
3664 tokentab[mid].operator);
3665 warntab[mid] |= NOT_OLD;
3668 if ((tokentab[mid].flags & BREAK) != 0)
3670 if ((tokentab[mid].flags & CONTINUE) != 0)
3679 if (in_main_context())
3681 emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
3683 memcpy(tokkey + 1, tokstart, tok - tokstart);
3684 yylval = GET_INSTRUCTION(Op_token);
3685 yylval->lextok = tokkey;
3693 yylval = bcalloc(tokentab[mid].value, 3, sourceline);
3700 if (! do_pretty_print)
3701 return lasttok = class;
3704 yylval = bcalloc(tokentab[mid].value, 2, sourceline);
3708 * These must be checked here, due to the LALR nature of the parser,
3709 * the rules for continue and break may not be reduced until after
3710 * a token that increments the xxx_allowed varibles is seen. Bleah.
3713 if (! continue_allowed) {
3714 error_ln(sourceline,
3715 _("`continue' is not allowed outside a loop"));
3718 goto make_instruction;
3721 if (! break_allowed) {
3722 error_ln(sourceline,
3723 _("`break' is not allowed outside a loop or switch"));
3726 goto make_instruction;
3730 yylval = GET_INSTRUCTION(tokentab[mid].value);
3731 if (class == LEX_BUILTIN || class == LEX_LENGTH)
3732 yylval->builtin_idx = mid;
3735 return lasttok = class;
3738 tokkey = estrdup(tokstart, tok - tokstart);
3739 if (*lexptr == '(') {
3740 yylval = bcalloc(Op_token, 2, sourceline);
3741 yylval->lextok = tokkey;
3742 return lasttok = FUNC_CALL;
3744 static bool goto_warned = false;
3746 yylval = GET_INSTRUCTION(Op_token);
3747 yylval->lextok = tokkey;
3749 #define SMART_ALECK 1
3750 if (SMART_ALECK && do_lint
3751 && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
3753 lintwarn(_("`goto' considered harmful!\n"));
3755 return lasttok = NAME;
3758 #undef GET_INSTRUCTION
3762 /* snode --- instructions for builtin functions. Checks for arg. count
3763 and supplies defaults where possible. */
3765 static INSTRUCTION *
3766 snode(INSTRUCTION *subn, INSTRUCTION *r)
3773 int idx = r->builtin_idx;
3777 for (tp = subn->nexti; tp; tp = tp->nexti) {
3784 /* check against how many args. are allowed for this builtin */
3785 args_allowed = tokentab[idx].flags & ARGS;
3786 if (args_allowed && (args_allowed & A(nexp)) == 0) {
3787 yyerror(_("%d is invalid as number of arguments for %s"),
3788 nexp, tokentab[idx].operator);
3792 /* special processing for sub, gsub and gensub */
3794 if (tokentab[idx].value == Op_sub_builtin) {
3795 const char *operator = tokentab[idx].operator;
3799 arg = subn->nexti; /* first arg list */
3800 (void) mk_rexp(arg);
3802 if (strcmp(operator, "gensub") != 0) {
3805 if (strcmp(operator, "gsub") == 0)
3806 r->sub_flags |= GSUB;
3808 arg = arg->lasti->nexti; /* 2nd arg list */
3812 expr = list_create(instruction(Op_push_i));
3813 expr->nexti->memory = make_number(0.0);
3814 (void) mk_expression_list(subn,
3815 list_append(expr, instruction(Op_field_spec)));
3818 arg = arg->lasti->nexti; /* third arg list */
3820 if (ip->opcode == Op_push_i) {
3822 lintwarn(_("%s: string literal as last arg of substitute has no effect"),
3824 r->sub_flags |= LITERAL;
3826 if (make_assignable(ip) == NULL)
3827 yyerror(_("%s third parameter is not a changeable object"),
3830 ip->do_reference = true;
3833 r->expr_count = count_expressions(&subn, false);
3836 (void) list_append(subn, r);
3838 /* add after_assign code */
3839 if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
3840 (void) list_append(subn, instruction(Op_var_assign));
3841 subn->lasti->assign_ctxt = Op_sub_builtin;
3842 subn->lasti->assign_var = ip->memory->var_assign;
3843 } else if (ip->opcode == Op_field_spec_lhs) {
3844 (void) list_append(subn, instruction(Op_field_assign));
3845 subn->lasti->assign_ctxt = Op_sub_builtin;
3846 subn->lasti->field_assign = (Func_ptr) 0;
3847 ip->target_assign = subn->lasti;
3848 } else if (ip->opcode == Op_subscript_lhs) {
3849 (void) list_append(subn, instruction(Op_subscript_assign));
3850 subn->lasti->assign_ctxt = Op_sub_builtin;
3858 r->sub_flags |= GENSUB;
3860 ip = instruction(Op_push_i);
3861 ip->memory = make_number(0.0);
3862 (void) mk_expression_list(subn,
3863 list_append(list_create(ip), instruction(Op_field_spec)));
3866 r->expr_count = count_expressions(&subn, false);
3867 return list_append(subn, r);
3872 /* N.B.: There isn't any special processing for an alternate function below */
3873 if (do_mpfr && tokentab[idx].ptr2)
3874 r->builtin = tokentab[idx].ptr2;
3877 r->builtin = tokentab[idx].ptr;
3879 /* special case processing for a few builtins */
3881 if (r->builtin == do_length) {
3883 /* no args. Use $0 */
3887 list = list_create(r);
3888 (void) list_prepend(list, instruction(Op_field_spec));
3889 (void) list_prepend(list, instruction(Op_push_i));
3890 list->nexti->memory = make_number(0.0);
3894 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3895 arg->nexti->opcode = Op_push_arg; /* argument may be array */
3897 } else if (r->builtin == do_isarray) {
3899 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3900 arg->nexti->opcode = Op_push_arg; /* argument may be array */
3901 } else if (r->builtin == do_match) {
3902 static bool warned = false;
3904 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
3905 (void) mk_rexp(arg);
3907 if (nexp == 3) { /* 3rd argument there */
3908 if (do_lint && ! warned) {
3910 lintwarn(_("match: third argument is a gawk extension"));
3912 if (do_traditional) {
3913 yyerror(_("match: third argument is a gawk extension"));
3917 arg = arg->lasti->nexti; /* third arg list */
3919 if (/*ip == arg->nexti && */ ip->opcode == Op_push)
3920 ip->opcode = Op_push_array;
3922 } else if (r->builtin == do_split) {
3923 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
3925 if (ip->opcode == Op_push)
3926 ip->opcode = Op_push_array;
3929 expr = list_create(instruction(Op_push));
3930 expr->nexti->memory = FS_node;
3931 (void) mk_expression_list(subn, expr);
3933 arg = arg->lasti->nexti;
3936 n->re_flags |= FS_DFLT;
3938 arg = arg->lasti->nexti;
3940 if (ip->opcode == Op_push)
3941 ip->opcode = Op_push_array;
3943 } else if (r->builtin == do_patsplit) {
3944 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
3946 if (ip->opcode == Op_push)
3947 ip->opcode = Op_push_array;
3950 expr = list_create(instruction(Op_push));
3951 expr->nexti->memory = FPAT_node;
3952 (void) mk_expression_list(subn, expr);
3954 arg = arg->lasti->nexti;
3957 arg = arg->lasti->nexti;
3959 if (ip->opcode == Op_push)
3960 ip->opcode = Op_push_array;
3962 } else if (r->builtin == do_close) {
3963 static bool warned = false;
3965 if (do_lint && ! warned) {
3967 lintwarn(_("close: second argument is a gawk extension"));
3969 if (do_traditional) {
3970 yyerror(_("close: second argument is a gawk extension"));
3974 } else if (do_intl /* --gen-po */
3975 && r->builtin == do_dcgettext /* dcgettext(...) */
3976 && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
3977 && (subn->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
3978 /* ala xgettext, dcgettext("some string" ...) dumps the string */
3979 NODE *str = subn->nexti->lasti->memory;
3981 if ((str->flags & INTLSTR) != 0)
3982 warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
3983 /* don't dump it, the lexer already did */
3985 dumpintlstr(str->stptr, str->stlen);
3986 } else if (do_intl /* --gen-po */
3987 && r->builtin == do_dcngettext /* dcngettext(...) */
3988 && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
3989 && (subn->nexti->lasti->memory->flags & STRCUR) != 0 /* it's a string constant */
3990 && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i /* 2nd arg is constant too */
3991 && (subn->nexti->lasti->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
3992 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
3993 NODE *str1 = subn->nexti->lasti->memory;
3994 NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
3996 if (((str1->flags | str2->flags) & INTLSTR) != 0)
3997 warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
3999 dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
4000 } else if (r->builtin == do_asort || r->builtin == do_asorti) {
4001 arg = subn->nexti; /* 1st arg list */
4003 if (ip->opcode == Op_push)
4004 ip->opcode = Op_push_array;
4008 if (ip->opcode == Op_push)
4009 ip->opcode = Op_push_array;
4012 else if (r->builtin == do_index) {
4013 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
4015 if (ip->opcode == Op_match_rec)
4016 fatal(_("index: regexp constant as second argument is not allowed"));
4019 else if (r->builtin == do_adump) {
4020 ip = subn->nexti->lasti;
4021 if (ip->opcode == Op_push)
4022 ip->opcode = Op_push_array;
4027 r->expr_count = count_expressions(&subn, false);
4028 return list_append(subn, r);
4032 return list_create(r);
4036 /* parms_shadow --- check if parameters shadow globals */
4039 parms_shadow(INSTRUCTION *pc, bool *shadow)
4046 func = pc->func_body;
4047 fname = func->vname;
4050 #if 0 /* can't happen, already exited if error ? */
4051 if (fname == NULL || func == NULL) /* error earlier */
4055 pcount = func->param_cnt;
4057 if (pcount == 0) /* no args, no problem */
4060 source = pc->source_file;
4061 sourceline = pc->source_line;
4063 * Use warning() and not lintwarn() so that can warn
4064 * about all shadowed parameters.
4066 for (i = 0; i < pcount; i++) {
4067 if (lookup(fp[i].param) != NULL) {
4069 _("function `%s': parameter `%s' shadows global variable"),
4070 fname, fp[i].param);
4079 /* valinfo --- dump var info */
4082 valinfo(NODE *n, Func_print print_func, FILE *fp)
4084 if (n == Nnull_string)
4085 print_func(fp, "uninitialized scalar\n");
4086 else if (n->flags & STRING) {
4087 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
4088 print_func(fp, "\n");
4089 } else if (n->flags & NUMBER) {
4091 if (is_mpg_float(n))
4092 print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
4093 else if (is_mpg_integer(n))
4094 print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
4097 print_func(fp, "%.17g\n", n->numbr);
4098 } else if (n->flags & STRCUR) {
4099 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
4100 print_func(fp, "\n");
4101 } else if (n->flags & NUMCUR) {
4103 if (is_mpg_float(n))
4104 print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
4105 else if (is_mpg_integer(n))
4106 print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
4109 print_func(fp, "%.17g\n", n->numbr);
4111 print_func(fp, "?? flags %s\n", flags2str(n->flags));
4115 /* dump_vars --- dump the symbol table */
4118 dump_vars(const char *fname)
4125 else if ((fp = fopen(fname, "w")) == NULL) {
4126 warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
4127 warning(_("sending variable list to standard error"));
4131 vars = variable_list();
4132 print_vars(vars, fprintf, fp);
4134 if (fp != stderr && fclose(fp) != 0)
4135 warning(_("%s: close failed (%s)"), fname, strerror(errno));
4138 /* dump_funcs --- print all functions */
4144 funcs = function_list(true);
4145 (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) pp_func, (void *) 0);
4150 /* shadow_funcs --- check all functions for parameters that shadow globals */
4155 static int calls = 0;
4156 bool shadow = false;
4160 fatal(_("shadow_funcs() called twice!"));
4162 funcs = function_list(true);
4163 (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) parms_shadow, & shadow);
4166 /* End with fatal if the user requested it. */
4167 if (shadow && lintfunc != warning)
4168 lintwarn(_("there were shadowed variables."));
4172 /* mk_function --- finalize function definition node; remove parameters
4173 * out of the symbol table.
4176 static INSTRUCTION *
4177 mk_function(INSTRUCTION *fi, INSTRUCTION *def)
4181 thisfunc = fi->func_body;
4182 assert(thisfunc != NULL);
4184 if (do_optimize && def->lasti->opcode == Op_pop) {
4185 /* tail call which does not return any value. */
4189 for (t = def->nexti; t->nexti != def->lasti; t = t->nexti)
4191 if (t->opcode == Op_func_call
4192 && strcmp(t->func_name, thisfunc->vname) == 0)
4193 (t + 1)->tail_call = true;
4196 /* add an implicit return at end;
4197 * also used by 'return' command in debugger
4200 (void) list_append(def, instruction(Op_push_i));
4201 def->lasti->memory = dupnode(Nnull_string);
4202 (void) list_append(def, instruction(Op_K_return));
4204 if (do_pretty_print)
4205 (void) list_prepend(def, instruction(Op_exec_count));
4207 /* fi->opcode = Op_func */
4208 (fi + 1)->firsti = def->nexti;
4209 (fi + 1)->lasti = def->lasti;
4210 (fi + 2)->first_line = fi->source_line;
4211 (fi + 2)->last_line = lastline;
4212 fi->nexti = def->nexti;
4215 (void) list_append(rule_list, fi + 1); /* debugging */
4217 /* update lint table info */
4218 func_use(thisfunc->vname, FUNC_DEFINE);
4220 /* remove params from symbol table */
4221 remove_params(thisfunc);
4227 * install function name in the symbol table.
4228 * Extra work, build up and install a list of the parameter names.
4232 install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist)
4238 if (r != NULL || is_deferred_variable(fname)) {
4239 error_ln(fi->source_line, _("function name `%s' previously defined"), fname);
4244 pcount = plist->lasti->param_count + 1;
4245 f = install_symbol(fname, Node_func);
4247 f->param_cnt = pcount;
4252 pnames = check_params(fname, pcount, plist); /* frees plist */
4253 f->fparms = make_params(pnames, pcount);
4261 /* check_params --- build a list of function parameter names after
4262 * making sure that the names are valid and there are no duplicates.
4266 check_params(char *fname, int pcount, INSTRUCTION *list)
4268 INSTRUCTION *p, *np;
4275 emalloc(pnames, char **, pcount * sizeof(char *), "check_params");
4277 for (i = 0, p = list->nexti; p != NULL; i++, p = np) {
4282 if (strcmp(name, fname) == 0) {
4283 /* check for function foo(foo) { ... }. bleah. */
4284 error_ln(p->source_line,
4285 _("function `%s': can't use function name as parameter name"), fname);
4286 } else if (is_std_var(name)) {
4287 error_ln(p->source_line,
4288 _("function `%s': can't use special variable `%s' as a function parameter"),
4292 /* check for duplicate parameters */
4293 for (j = 0; j < i; j++) {
4294 if (strcmp(name, pnames[j]) == 0) {
4295 error_ln(p->source_line,
4296 _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
4297 fname, i + 1, name, j + 1);
4313 #define HASHSIZE 1021
4315 static struct fdesc {
4321 } *ftable[HASHSIZE];
4323 /* func_use --- track uses and definitions of functions */
4326 func_use(const char *name, enum defref how)
4333 ind = hash(name, len, HASHSIZE, NULL);
4335 for (fp = ftable[ind]; fp != NULL; fp = fp->next)
4336 if (strcmp(fp->name, name) == 0)
4339 /* not in the table, fall through to allocate a new one */
4341 emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
4342 memset(fp, '\0', sizeof(struct fdesc));
4343 emalloc(fp->name, char *, len + 1, "func_use");
4344 strcpy(fp->name, name);
4345 fp->next = ftable[ind];
4349 if (how == FUNC_DEFINE)
4351 else if (how == FUNC_EXT) {
4358 /* track_ext_func --- add an extension function to the table */
4361 track_ext_func(const char *name)
4363 func_use(name, FUNC_EXT);
4366 /* check_funcs --- verify functions that are called but not defined */
4371 struct fdesc *fp, *next;
4374 if (! in_main_context())
4377 for (i = 0; i < HASHSIZE; i++) {
4378 for (fp = ftable[i]; fp != NULL; fp = fp->next) {
4380 /* making this the default breaks old code. sigh. */
4381 if (fp->defined == 0 && ! fp->extension) {
4383 _("function `%s' called but never defined"), fp->name);
4387 if (do_lint && fp->defined == 0 && ! fp->extension)
4389 _("function `%s' called but never defined"), fp->name);
4392 if (do_lint && fp->used == 0 && ! fp->extension) {
4393 lintwarn(_("function `%s' defined but never called directly"),
4400 /* now let's free all the memory */
4401 for (i = 0; i < HASHSIZE; i++) {
4402 for (fp = ftable[i]; fp != NULL; fp = next) {
4411 /* param_sanity --- look for parameters that are regexp constants */
4414 param_sanity(INSTRUCTION *arglist)
4416 INSTRUCTION *argl, *arg;
4419 if (arglist == NULL)
4421 for (argl = arglist->nexti; argl; ) {
4423 if (arg->opcode == Op_match_rec)
4424 warning_ln(arg->source_line,
4425 _("regexp constant for parameter #%d yields boolean value"), i);
4431 /* deferred variables --- those that are only defined if needed. */
4434 * Is there any reason to use a hash table for deferred variables? At the
4435 * moment, there are only 1 to 3 such variables, so it may not be worth
4436 * the overhead. If more modules start using this facility, it should
4437 * probably be converted into a hash table.
4440 static struct deferred_variable {
4441 NODE *(*load_func)(void);
4442 struct deferred_variable *next;
4443 char name[1]; /* variable-length array */
4444 } *deferred_variables;
4446 /* register_deferred_variable --- add a var name and loading function to the list */
4449 register_deferred_variable(const char *name, NODE *(*load_func)(void))
4451 struct deferred_variable *dv;
4452 size_t sl = strlen(name);
4454 emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
4455 "register_deferred_variable");
4456 dv->load_func = load_func;
4457 dv->next = deferred_variables;
4458 memcpy(dv->name, name, sl+1);
4459 deferred_variables = dv;
4462 /* is_deferred_variable --- check if NAME is a deferred variable */
4465 is_deferred_variable(const char *name)
4467 struct deferred_variable *dv;
4468 for (dv = deferred_variables; dv != NULL; dv = dv->next)
4469 if (strcmp(name, dv->name) == 0)
4475 /* variable --- make sure NAME is in the symbol table */
4478 variable(int location, char *name, NODETYPE type)
4482 if ((r = lookup(name)) != NULL) {
4483 if (r->type == Node_func || r->type == Node_ext_func )
4484 error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"),
4486 if (r == symbol_table)
4490 struct deferred_variable *dv;
4492 for (dv = deferred_variables; true; dv = dv->next) {
4495 * This is the only case in which we may not free the string.
4497 return install_symbol(name, type);
4499 if (strcmp(name, dv->name) == 0) {
4500 r = (*dv->load_func)();
4509 /* process_deferred --- if the program uses SYMTAB, load deferred variables */
4514 struct deferred_variable *dv;
4519 for (dv = deferred_variables; dv != NULL; dv = dv->next) {
4520 (void) dv->load_func();
4524 /* make_regnode --- make a regular expression node */
4527 make_regnode(int type, NODE *exp)
4532 memset(n, 0, sizeof(NODE));
4536 if (type == Node_regex) {
4537 n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false);
4538 if (n->re_reg == NULL) {
4543 n->re_flags = CONSTANT;
4549 /* mk_rexp --- make a regular expression constant */
4552 mk_rexp(INSTRUCTION *list)
4557 if (ip == list->lasti && ip->opcode == Op_match_rec)
4558 ip->opcode = Op_push_re;
4560 ip = instruction(Op_push_re);
4561 ip->memory = make_regnode(Node_dynregex, NULL);
4562 ip->nexti = list->lasti->nexti;
4563 list->lasti->nexti = ip;
4570 /* isnoeffect --- when used as a statement, has no side effects */
4573 isnoeffect(OPCODE type)
4590 case Op_unary_minus:
4607 break; /* keeps gcc -Wall happy */
4612 #endif /* NO_LINT */
4615 /* make_assignable --- make this operand an assignable one if posiible */
4617 static INSTRUCTION *
4618 make_assignable(INSTRUCTION *ip)
4620 switch (ip->opcode) {
4622 ip->opcode = Op_push_lhs;
4625 ip->opcode = Op_field_spec_lhs;
4628 ip->opcode = Op_subscript_lhs;
4631 break; /* keeps gcc -Wall happy */
4636 /* stopme --- for debugging */
4639 stopme(int nargs ATTRIBUTE_UNUSED)
4641 return make_number(0.0);
4644 /* dumpintlstr --- write out an initial .po file entry for the string */
4647 dumpintlstr(const char *str, size_t len)
4651 /* See the GNU gettext distribution for details on the file format */
4653 if (source != NULL) {
4654 /* ala the gettext sources, remove leading `./'s */
4655 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4657 printf("#: %s:%d\n", cp, sourceline);
4661 pp_string_fp(fprintf, stdout, str, len, '"', true);
4663 printf("msgstr \"\"\n\n");
4667 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
4670 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
4674 /* See the GNU gettext distribution for details on the file format */
4676 if (source != NULL) {
4677 /* ala the gettext sources, remove leading `./'s */
4678 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4680 printf("#: %s:%d\n", cp, sourceline);
4684 pp_string_fp(fprintf, stdout, str1, len1, '"', true);
4686 printf("msgid_plural ");
4687 pp_string_fp(fprintf, stdout, str2, len2, '"', true);
4689 printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
4693 /* mk_binary --- instructions for binary operators */
4695 static INSTRUCTION *
4696 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
4698 INSTRUCTION *ip1,*ip2;
4702 if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
4703 /* do any numeric constant folding */
4706 && ip1 == s1->lasti && ip1->opcode == Op_push_i
4707 && (ip1->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
4708 && (ip2->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
4710 NODE *n1 = ip1->memory, *n2 = ip2->memory;
4711 res = force_number(n1)->numbr;
4712 (void) force_number(n2);
4713 switch (op->opcode) {
4718 if (n2->numbr == 0.0) {
4719 /* don't fatalize, allow parsing rest of the input */
4720 error_ln(op->source_line, _("division by zero attempted"));
4727 if (n2->numbr == 0.0) {
4728 /* don't fatalize, allow parsing rest of the input */
4729 error_ln(op->source_line, _("division by zero attempted in `%%'"));
4733 res = fmod(res, n2->numbr);
4734 #else /* ! HAVE_FMOD */
4735 (void) modf(res / n2->numbr, &res);
4736 res = n1->numbr - res * n2->numbr;
4737 #endif /* ! HAVE_FMOD */
4746 res = calc_exp(res, n2->numbr);
4752 op->opcode = Op_push_i;
4753 op->memory = make_number(res);
4760 return list_create(op);
4762 /* do basic arithmetic optimisation */
4763 /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
4764 switch (op->opcode) {
4766 op->opcode = Op_times_i;
4769 op->opcode = Op_quotient_i;
4772 op->opcode = Op_mod_i;
4775 op->opcode = Op_plus_i;
4778 op->opcode = Op_minus_i;
4781 op->opcode = Op_exp_i;
4787 op->memory = ip2->memory;
4789 bcfree(s2); /* Op_list */
4790 return list_append(s1, op);
4795 /* append lists s1, s2 and add `op' bytecode */
4796 (void) list_merge(s1, s2);
4797 return list_append(s1, op);
4800 /* mk_boolean --- instructions for boolean and, or */
4802 static INSTRUCTION *
4803 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
4806 OPCODE opc, final_opc;
4808 opc = op->opcode; /* Op_and or Op_or */
4809 final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
4811 add_lint(right, LINT_assign_in_cond);
4815 if (tp->opcode != final_opc) { /* x || y */
4816 list_append(right, instruction(final_opc));
4817 add_lint(left, LINT_assign_in_cond);
4818 (void) list_append(left, op);
4819 left->lasti->target_jmp = right->lasti;
4821 /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
4822 * target_stmt only used in the parser (see below).
4825 left->lasti->target_stmt = left->lasti;
4826 right->lasti->target_stmt = left->lasti;
4827 } else { /* optimization for x || y || z || ... */
4830 op->opcode = final_opc;
4831 (void) list_append(right, op);
4832 op->target_stmt = tp;
4834 tp->target_jmp = op;
4836 /* update jump targets */
4837 for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
4838 assert(ip->opcode == opc);
4839 assert(ip->target_jmp == tp);
4840 /* if (ip->opcode == opc && ip->target_jmp == tp) */
4841 ip->target_jmp = op;
4842 if (ip->target_stmt == ip)
4847 return list_merge(left, right);
4850 /* mk_condition --- if-else and conditional */
4852 static INSTRUCTION *
4853 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
4854 INSTRUCTION *elsep, INSTRUCTION *false_branch)
4860 * t: [Op_jmp_false f ]
4876 if (false_branch == NULL) {
4877 false_branch = list_create(instruction(Op_no_op));
4878 if (elsep != NULL) { /* else { } */
4879 if (do_pretty_print)
4880 (void) list_prepend(false_branch, elsep);
4885 /* assert(elsep != NULL); */
4887 /* avoid a series of no_op's: if .. else if .. else if .. */
4888 if (false_branch->lasti->opcode != Op_no_op)
4889 (void) list_append(false_branch, instruction(Op_no_op));
4890 if (do_pretty_print) {
4891 (void) list_prepend(false_branch, elsep);
4892 false_branch->nexti->branch_end = false_branch->lasti;
4893 (void) list_prepend(false_branch, instruction(Op_exec_count));
4898 (void) list_prepend(false_branch, instruction(Op_jmp));
4899 false_branch->nexti->target_jmp = false_branch->lasti;
4901 add_lint(cond, LINT_assign_in_cond);
4902 ip = list_append(cond, instruction(Op_jmp_false));
4903 ip->lasti->target_jmp = false_branch->nexti->nexti;
4905 if (do_pretty_print) {
4906 (void) list_prepend(ip, ifp);
4907 (void) list_append(ip, instruction(Op_exec_count));
4908 ip->nexti->branch_if = ip->lasti;
4909 ip->nexti->branch_else = false_branch->nexti;
4913 if (true_branch != NULL)
4914 list_merge(ip, true_branch);
4915 return list_merge(ip, false_branch);
4918 enum defline { FIRST_LINE, LAST_LINE };
4920 /* find_line -- find the first(last) line in a list of (pattern) instructions */
4923 find_line(INSTRUCTION *pattern, enum defline what)
4928 for (ip = pattern->nexti; ip; ip = ip->nexti) {
4929 if (what == LAST_LINE) {
4930 if (ip->source_line > lineno)
4931 lineno = ip->source_line;
4932 } else { /* FIRST_LINE */
4933 if (ip->source_line > 0
4934 && (lineno == 0 || ip->source_line < lineno))
4935 lineno = ip->source_line;
4937 if (ip == pattern->lasti)
4944 /* append_rule --- pattern-action instructions */
4946 static INSTRUCTION *
4947 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
4967 if (do_pretty_print)
4968 (void) list_append(action, instruction(Op_no_op));
4969 (rp + 1)->firsti = action->nexti;
4970 (rp + 1)->lasti = action->lasti;
4971 (rp + 2)->first_line = pattern->source_line;
4972 (rp + 2)->last_line = lastline;
4973 ip = list_prepend(action, rp);
4976 rp = bcalloc(Op_rule, 3, 0);
4978 rp->source_file = source;
4979 tp = instruction(Op_no_op);
4981 if (pattern == NULL) {
4982 /* assert(action != NULL); */
4983 if (do_pretty_print)
4984 (void) list_prepend(action, instruction(Op_exec_count));
4985 (rp + 1)->firsti = action->nexti;
4986 (rp + 1)->lasti = tp;
4987 (rp + 2)->first_line = firstline;
4988 (rp + 2)->last_line = lastline;
4989 rp->source_line = firstline;
4990 ip = list_prepend(list_append(action, tp), rp);
4992 (void) list_append(pattern, instruction(Op_jmp_false));
4993 pattern->lasti->target_jmp = tp;
4994 (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
4995 rp->source_line = (rp + 2)->first_line;
4996 if (action == NULL) {
4997 (rp + 2)->last_line = find_line(pattern, LAST_LINE);
4998 action = list_create(instruction(Op_K_print_rec));
4999 if (do_pretty_print)
5000 (void) list_prepend(action, instruction(Op_exec_count));
5002 (rp + 2)->last_line = lastline;
5004 if (do_pretty_print) {
5005 (void) list_prepend(pattern, instruction(Op_exec_count));
5006 (void) list_prepend(action, instruction(Op_exec_count));
5008 (rp + 1)->firsti = action->nexti;
5009 (rp + 1)->lasti = tp;
5011 list_merge(list_prepend(pattern, rp),
5018 list_append(rule_list, rp + 1);
5020 if (rule_block[rule] == NULL)
5021 rule_block[rule] = ip;
5023 (void) list_merge(rule_block[rule], ip);
5025 return rule_block[rule];
5028 /* mk_assignment --- assignment bytecodes */
5030 static INSTRUCTION *
5031 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
5037 switch (tp->opcode) {
5039 tp->opcode = Op_field_spec_lhs;
5042 tp->opcode = Op_subscript_lhs;
5046 tp->opcode = Op_push_lhs;
5048 case Op_field_assign:
5049 yyerror(_("cannot assign a value to the result of a field post-increment expression"));
5052 yyerror(_("invalid target of assignment (opcode %s)"),
5053 opcode2str(tp->opcode));
5057 tp->do_reference = (op->opcode != Op_assign); /* check for uninitialized reference */
5060 ip = list_merge(rhs, lhs);
5064 (void) list_append(ip, op);
5066 if (tp->opcode == Op_push_lhs
5067 && tp->memory->type == Node_var
5068 && tp->memory->var_assign
5070 tp->do_reference = false; /* no uninitialized reference checking
5071 * for a special variable.
5073 (void) list_append(ip, instruction(Op_var_assign));
5074 ip->lasti->assign_var = tp->memory->var_assign;
5075 } else if (tp->opcode == Op_field_spec_lhs) {
5076 (void) list_append(ip, instruction(Op_field_assign));
5077 ip->lasti->field_assign = (Func_ptr) 0;
5078 tp->target_assign = ip->lasti;
5079 } else if (tp->opcode == Op_subscript_lhs) {
5080 (void) list_append(ip, instruction(Op_subscript_assign));
5086 /* optimize_assignment --- peephole optimization for assignment */
5088 static INSTRUCTION *
5089 optimize_assignment(INSTRUCTION *exp)
5091 INSTRUCTION *i1, *i2, *i3;
5094 * Optimize assignment statements array[subs] = x; var = x; $n = x;
5095 * string concatenation of the form s = s t.
5097 * 1) Array element assignment array[subs] = x:
5098 * Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
5099 * with single instruction Op_store_sub.
5100 * Limitation: 1 dimension and sub is simple var/value.
5102 * 2) Simple variable assignment var = x:
5103 * Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
5105 * 3) Field assignment $n = x:
5106 * Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
5107 * with Op_store_field.
5109 * 4) Optimization for string concatenation:
5110 * For cases like x = x y, uses realloc to include y in x;
5111 * also eliminates instructions Op_push_lhs and Op_pop.
5115 * N.B.: do not append Op_pop instruction to the returned
5116 * instruction list if optimized. None of these
5117 * optimized instructions pushes the r-value of assignment
5118 * onto the runtime stack.
5124 if ( i1->opcode != Op_assign
5125 && i1->opcode != Op_field_assign)
5126 return list_append(exp, instruction(Op_pop));
5128 for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
5129 switch (i2->opcode) {
5131 if (i2->nexti->opcode == Op_push_lhs /* l.h.s is a simple variable */
5132 && (i2->concat_flag & CSVAR) /* 1st exp in r.h.s is a simple variable;
5133 * see Op_concat in the grammer above.
5135 && i2->nexti->memory == exp->nexti->memory /* and the same as in l.h.s */
5136 && i2->nexti->nexti == i1
5137 && i1->opcode == Op_assign
5139 /* s = s ... optimization */
5141 /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
5142 * check for l-value reference to this variable in the r.h.s.
5143 * Also, avoid function calls in general to guard against
5144 * global variable assignment.
5147 for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
5148 if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
5149 || i3->opcode == Op_func_call)
5150 return list_append(exp, instruction(Op_pop)); /* no optimization */
5153 /* remove the variable from r.h.s */
5155 exp->nexti = i3->nexti;
5158 if (--i2->expr_count == 1) /* one less expression in Op_concat */
5159 i2->opcode = Op_no_op;
5162 assert(i3->opcode == Op_push_lhs);
5163 i3->opcode = Op_assign_concat; /* change Op_push_lhs to Op_assign_concat */
5165 bcfree(i1); /* Op_assign */
5166 exp->lasti = i3; /* update Op_list */
5171 case Op_field_spec_lhs:
5172 if (i2->nexti->opcode == Op_assign
5173 && i2->nexti->nexti == i1
5174 && i1->opcode == Op_field_assign
5177 i2->opcode = Op_store_field;
5178 bcfree(i2->nexti); /* Op_assign */
5180 bcfree(i1); /* Op_field_assign */
5181 exp->lasti = i2; /* update Op_list */
5187 if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
5188 i3 = i2->nexti->nexti;
5189 if (i3->sub_count == 1
5191 && i1->opcode == Op_assign
5193 /* array[sub] = .. */
5194 i3->opcode = Op_store_sub;
5195 i3->memory = i2->memory;
5196 i3->expr_count = 1; /* sub_count shadows memory,
5197 * so use expr_count instead.
5200 i2->opcode = Op_no_op;
5201 bcfree(i1); /* Op_assign */
5202 exp->lasti = i3; /* update Op_list */
5210 && i1->opcode == Op_assign
5213 i2->opcode = Op_store_var;
5215 bcfree(i1); /* Op_assign */
5216 exp->lasti = i2; /* update Op_list */
5219 if (i3->opcode == Op_push_i
5220 && (i3->memory->flags & INTLSTR) == 0
5223 /* constant initializer */
5224 i2->initval = i3->memory;
5239 /* no optimization */
5240 return list_append(exp, instruction(Op_pop));
5244 /* mk_getline --- make instructions for getline */
5246 static INSTRUCTION *
5247 mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype)
5251 INSTRUCTION *asgn = NULL;
5254 * getline [var] < [file]
5256 * [ file (simp_exp)]
5258 * [ Op_K_getline_redir|NULL|redir_type|into_var]
5263 if (redir == NULL) {
5264 int sline = op->source_line;
5266 op = bcalloc(Op_K_getline, 2, sline);
5267 (op + 1)->target_endfile = ip_endfile;
5268 (op + 1)->target_beginfile = ip_beginfile;
5272 tp = make_assignable(var->lasti);
5275 /* check if we need after_assign bytecode */
5276 if (tp->opcode == Op_push_lhs
5277 && tp->memory->type == Node_var
5278 && tp->memory->var_assign
5280 asgn = instruction(Op_var_assign);
5281 asgn->assign_ctxt = op->opcode;
5282 asgn->assign_var = tp->memory->var_assign;
5283 } else if (tp->opcode == Op_field_spec_lhs) {
5284 asgn = instruction(Op_field_assign);
5285 asgn->assign_ctxt = op->opcode;
5286 asgn->field_assign = (Func_ptr) 0; /* determined at run time */
5287 tp->target_assign = asgn;
5288 } else if (tp->opcode == Op_subscript_lhs) {
5289 asgn = instruction(Op_subscript_assign);
5290 asgn->assign_ctxt = op->opcode;
5293 if (redir != NULL) {
5294 ip = list_merge(redir, var);
5295 (void) list_append(ip, op);
5297 ip = list_append(var, op);
5298 } else if (redir != NULL)
5299 ip = list_append(redir, op);
5301 ip = list_create(op);
5302 op->into_var = (var != NULL);
5303 op->redir_type = (redir != NULL) ? redirtype : redirect_none;
5305 return (asgn == NULL ? ip : list_append(ip, asgn));
5309 /* mk_for_loop --- for loop bytecodes */
5311 static INSTRUCTION *
5312 mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
5313 INSTRUCTION *incr, INSTRUCTION *body)
5316 * ------------------------
5317 * init (may be NULL)
5318 * ------------------------
5320 * cond (Op_no_op if NULL)
5321 * ------------------------
5322 * [ Op_jmp_false tb ]
5323 * ------------------------
5324 * body (may be NULL)
5325 * ------------------------
5327 * incr (may be NULL)
5329 * ------------------------
5333 INSTRUCTION *ip, *tbreak, *tcont;
5335 INSTRUCTION *pp_cond;
5338 tbreak = instruction(Op_no_op);
5341 add_lint(cond, LINT_assign_in_cond);
5342 pp_cond = cond->nexti;
5344 (void) list_append(ip, instruction(Op_jmp_false));
5345 ip->lasti->target_jmp = tbreak;
5347 pp_cond = instruction(Op_no_op);
5348 ip = list_create(pp_cond);
5352 ip = list_merge(init, ip);
5354 if (do_pretty_print) {
5355 (void) list_append(ip, instruction(Op_exec_count));
5356 (forp + 1)->forloop_cond = pp_cond;
5357 (forp + 1)->forloop_body = ip->lasti;
5361 (void) list_merge(ip, body);
5363 jmp = instruction(Op_jmp);
5364 jmp->target_jmp = pp_cond;
5368 tcont = incr->nexti;
5369 (void) list_merge(ip, incr);
5372 (void) list_append(ip, jmp);
5373 ret = list_append(ip, tbreak);
5374 fix_break_continue(ret, tbreak, tcont);
5376 if (do_pretty_print) {
5377 forp->target_break = tbreak;
5378 forp->target_continue = tcont;
5379 ret = list_prepend(ret, forp);
5386 /* add_lint --- add lint warning bytecode if needed */
5389 add_lint(INSTRUCTION *list, LINTTYPE linttype)
5395 case LINT_assign_in_cond:
5397 if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
5398 assert(ip != list->nexti);
5399 for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5403 if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
5404 list_append(list, instruction(Op_lint));
5405 list->lasti->lint_type = linttype;
5409 case LINT_no_effect:
5410 if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
5411 for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5414 if (do_lint) { /* compile-time warning */
5415 if (isnoeffect(ip->opcode))
5416 lintwarn_ln(ip->source_line, ("statement may have no effect"));
5419 if (ip->opcode == Op_push) { /* run-time warning */
5420 list_append(list, instruction(Op_lint));
5421 list->lasti->lint_type = linttype;
5432 /* mk_expression_list --- list of bytecode lists */
5434 static INSTRUCTION *
5435 mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
5439 /* we can't just combine all bytecodes, since we need to
5440 * process individual expressions for a few builtins in snode() (-:
5443 /* -- list of lists */
5444 /* [Op_list| ... ]------
5446 * [Op_list| ... ] -- |
5449 * [Op_list| ... ] -- |
5455 assert(s1 != NULL && s1->opcode == Op_list);
5457 list = instruction(Op_list);
5459 list->lasti = s1->lasti;
5463 /* append expression to the end of the list */
5467 list->lasti = s1->lasti;
5471 /* count_expressions --- fixup expression_list from mk_expression_list.
5472 * returns no of expressions in list. isarg is true
5473 * for function arguments.
5477 count_expressions(INSTRUCTION **list, bool isarg)
5480 INSTRUCTION *r = NULL;
5483 if (*list == NULL) /* error earlier */
5486 for (expr = (*list)->nexti; expr; ) {
5487 INSTRUCTION *t1, *t2;
5490 if (isarg && t1 == t2 && t1->opcode == Op_push)
5491 t1->opcode = Op_push_param;
5495 (void) list_merge(r, expr);
5500 if (! isarg && count > max_args)
5507 /* fix_break_continue --- fix up break & continue codes in loop bodies */
5510 fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target)
5514 list->lasti->nexti = NULL; /* just to make sure */
5516 for (ip = list->nexti; ip != NULL; ip = ip->nexti) {
5517 switch (ip->opcode) {
5519 if (ip->target_jmp == NULL)
5520 ip->target_jmp = b_target;
5524 if (ip->target_jmp == NULL)
5525 ip->target_jmp = c_target;
5529 /* this is to keep the compiler happy. sheesh. */
5535 static inline INSTRUCTION *
5536 list_create(INSTRUCTION *x)
5540 l = instruction(Op_list);
5546 static inline INSTRUCTION *
5547 list_append(INSTRUCTION *l, INSTRUCTION *x)
5550 if (l->opcode != Op_list)
5553 l->lasti->nexti = x;
5558 static inline INSTRUCTION *
5559 list_prepend(INSTRUCTION *l, INSTRUCTION *x)
5562 if (l->opcode != Op_list)
5565 x->nexti = l->nexti;
5570 static inline INSTRUCTION *
5571 list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
5574 if (l1->opcode != Op_list)
5576 if (l2->opcode != Op_list)
5579 l1->lasti->nexti = l2->nexti;
5580 l1->lasti = l2->lasti;
5585 /* See if name is a special token. */
5588 check_special(const char *name)
5592 #if 'a' == 0x81 /* it's EBCDIC */
5593 static bool did_sort = false;
5596 qsort((void *) tokentab,
5597 sizeof(tokentab) / sizeof(tokentab[0]),
5598 sizeof(tokentab[0]), tokcompare);
5604 high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
5605 while (low <= high) {
5606 mid = (low + high) / 2;
5607 i = *name - tokentab[mid].operator[0];
5609 i = strcmp(name, tokentab[mid].operator);
5611 if (i < 0) /* token < mid */
5613 else if (i > 0) /* token > mid */
5616 if ((do_traditional && (tokentab[mid].flags & GAWKX))
5617 || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
5626 * This provides a private version of functions that act like VMS's
5627 * variable-length record filesystem, where there was a bug on
5628 * certain source files.
5631 static FILE *fp = NULL;
5633 /* read_one_line --- return one input line at a time. mainly for debugging. */
5636 read_one_line(int fd, void *buffer, size_t count)
5640 /* Minor potential memory leak here. Too bad. */
5642 fp = fdopen(fd, "r");
5644 fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
5645 gawk_exit(EXIT_FAILURE);
5649 if (fgets(buf, sizeof buf, fp) == NULL)
5652 memcpy(buffer, buf, strlen(buf));
5656 /* one_line_close --- close the open file being read with read_one_line() */
5659 one_line_close(int fd)
5663 if (fp == NULL || fd != fileno(fp))
5664 fatal("debugging read/close screwed up!");