do not delete regex.c
[platform/upstream/gawk.git] / awkgram.y
1 /*
2  * awkgram.y --- yacc/bison parser
3  */
4
5 /* 
6  * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc.
7  * 
8  * This file is part of GAWK, the GNU implementation of the
9  * AWK Programming Language.
10  * 
11  * GAWK is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 3 of the License, or
14  * (at your option) any later version.
15  * 
16  * GAWK is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  * 
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
24  */
25
26 %{
27 #ifdef GAWKDEBUG
28 #define YYDEBUG 12
29 #endif
30
31 #include "awk.h"
32
33 #if defined(__STDC__) && __STDC__ < 1   /* VMS weirdness, maybe elsewhere */
34 #define signed /**/
35 #endif
36
37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
39 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
40 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
41 static char *get_src_buf(void);
42 static int yylex(void);
43 int     yyparse(void); 
44 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
45 static char **check_params(char *fname, int pcount, INSTRUCTION *list);
46 static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist);
47 static NODE *mk_rexp(INSTRUCTION *exp);
48 static void param_sanity(INSTRUCTION *arglist);
49 static int parms_shadow(INSTRUCTION *pc, bool *shadow);
50 #ifndef NO_LINT
51 static int isnoeffect(OPCODE type);
52 #endif
53 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
54 static void dumpintlstr(const char *str, size_t len);
55 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
56 static int include_source(INSTRUCTION *file);
57 static int load_library(INSTRUCTION *file);
58 static void next_sourcefile(void);
59 static char *tokexpand(void);
60 static bool is_deferred_variable(const char *name);
61
62 #define instruction(t)  bcalloc(t, 1, 0)
63
64 static INSTRUCTION *mk_program(void);
65 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
66 static INSTRUCTION *mk_function(INSTRUCTION *fi, INSTRUCTION *def);
67 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
68                 INSTRUCTION *elsep,     INSTRUCTION *false_branch);
69 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
70 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
71                 INSTRUCTION *incr, INSTRUCTION *body);
72 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
73 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
74 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
75 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
76 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
77 static NODE *make_regnode(int type, NODE *exp);
78 static int count_expressions(INSTRUCTION **list, bool isarg);
79 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
80 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
81
82 static void process_deferred();
83
84 enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT };
85 static void func_use(const char *name, enum defref how);
86 static void check_funcs(void);
87
88 static ssize_t read_one_line(int fd, void *buffer, size_t count);
89 static int one_line_close(int fd);
90
91 static bool want_source = false;
92 static bool want_regexp = false;        /* lexical scanning kludge */
93 static char *in_function;               /* parsing kludge */
94 static bool symtab_used = false;        /* program used SYMTAB */
95 static int rule = 0;
96
97 const char *const ruletab[] = {
98         "?",
99         "BEGIN",
100         "Rule",
101         "END",
102         "BEGINFILE",
103         "ENDFILE",
104 };
105
106 static bool in_print = false;   /* lexical scanning kludge for print */
107 static int in_parens = 0;       /* lexical scanning kludge for print */
108 static int sub_counter = 0;     /* array dimension counter for use in delete */
109 static char *lexptr = NULL;             /* pointer to next char during parsing */
110 static char *lexend;
111 static char *lexptr_begin;      /* keep track of where we were for error msgs */
112 static char *lexeme;            /* beginning of lexeme for debugging */
113 static bool lexeof;             /* seen EOF for current source? */  
114 static char *thisline = NULL;
115 static int in_braces = 0;       /* count braces for firstline, lastline in an 'action' */
116 static int lastline = 0;
117 static int firstline = 0;
118 static SRCFILE *sourcefile = NULL;      /* current program source */
119 static int lasttok = 0;
120 static bool eof_warned = false; /* GLOBAL: want warning for each file */
121 static int break_allowed;       /* kludge for break */
122 static int continue_allowed;    /* kludge for continue */
123
124 #define END_FILE        -1000
125 #define END_SRC         -2000
126
127 #define YYDEBUG_LEXER_TEXT (lexeme)
128 static char *tokstart = NULL;
129 static char *tok = NULL;
130 static char *tokend;
131 static int errcount = 0;
132
133 extern char *source;
134 extern int sourceline;
135 extern SRCFILE *srcfiles;
136 extern INSTRUCTION *rule_list;
137 extern int max_args;
138 extern NODE **args_array;
139
140 static INSTRUCTION *rule_block[sizeof(ruletab)];
141
142 static INSTRUCTION *ip_rec;
143 static INSTRUCTION *ip_newfile;
144 static INSTRUCTION *ip_atexit = NULL;
145 static INSTRUCTION *ip_end;
146 static INSTRUCTION *ip_endfile;
147 static INSTRUCTION *ip_beginfile;
148
149 static inline INSTRUCTION *list_create(INSTRUCTION *x);
150 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
151 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
152 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
153
154 extern double fmod(double x, double y);
155
156 #define YYSTYPE INSTRUCTION *
157
158 #define is_identchar(c)         (isalnum(c) || (c) == '_')
159 %}
160
161 %token FUNC_CALL NAME REGEXP FILENAME
162 %token YNUMBER YSTRING
163 %token RELOP IO_OUT IO_IN
164 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
165 %token SUBSCRIPT
166 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
167 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
168 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
169 %token LEX_BEGINFILE LEX_ENDFILE 
170 %token LEX_GETLINE LEX_NEXTFILE
171 %token LEX_IN
172 %token LEX_AND LEX_OR INCREMENT DECREMENT
173 %token LEX_BUILTIN LEX_LENGTH
174 %token LEX_EOF
175 %token LEX_INCLUDE LEX_EVAL LEX_LOAD
176 %token NEWLINE
177
178 /* Lowest to highest */
179 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
180 %right '?' ':'
181 %left LEX_OR
182 %left LEX_AND
183 %left LEX_GETLINE
184 %nonassoc LEX_IN
185 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
186 %nonassoc ','
187 %left MATCHOP
188 %nonassoc RELOP '<' '>' IO_IN IO_OUT
189 %left CONCAT_OP
190 %left YSTRING YNUMBER
191 %left '+' '-'
192 %left '*' '/' '%'
193 %right '!' UNARY
194 %right '^'
195 %left INCREMENT DECREMENT
196 %left '$'
197 %left '(' ')'
198 %%
199
200 program
201         : /* empty */
202         | program rule
203           {
204                 rule = 0;
205                 yyerrok;
206           }
207         | program nls
208         | program LEX_EOF
209           {
210                 next_sourcefile();
211                 if (sourcefile == srcfiles)
212                         process_deferred();
213           }
214         | program error
215           {
216                 rule = 0;
217                 /*
218                  * If errors, give up, don't produce an infinite
219                  * stream of syntax error messages.
220                  */
221                 /* yyerrok; */
222           }
223         ;
224
225 rule
226         : pattern action
227           {
228                 (void) append_rule($1, $2);
229           }
230         | pattern statement_term
231           {
232                 if (rule != Rule) {
233                         msg(_("%s blocks must have an action part"), ruletab[rule]);
234                         errcount++;
235                 } else if ($1 == NULL) {
236                         msg(_("each rule must have a pattern or an action part"));
237                         errcount++;
238                 } else          /* pattern rule with non-empty pattern */
239                         (void) append_rule($1, NULL);
240           }
241         | function_prologue action
242           {
243                 in_function = NULL;
244                 (void) mk_function($1, $2);
245                 yyerrok;
246           }
247         | '@' LEX_INCLUDE source statement_term
248           {
249                 want_source = false;
250                 yyerrok;
251           }
252         | '@' LEX_LOAD library statement_term
253           {
254                 want_source = false;
255                 yyerrok;
256           }
257         ;
258
259 source
260         : FILENAME
261           {
262                 if (include_source($1) < 0)
263                         YYABORT;
264                 efree($1->lextok);
265                 bcfree($1);
266                 $$ = NULL;
267           }
268         | FILENAME error
269           { $$ = NULL; }
270         | error
271           { $$ = NULL; }
272         ;
273
274 library
275         : FILENAME
276           {
277                 if (load_library($1) < 0)
278                         YYABORT;
279                 efree($1->lextok);
280                 bcfree($1);
281                 $$ = NULL;
282           }
283         | FILENAME error
284           { $$ = NULL; }
285         | error
286           { $$ = NULL; }
287         ;
288
289 pattern
290         : /* empty */
291           {     $$ = NULL; rule = Rule; }
292         | exp
293           {     $$ = $1; rule = Rule; }
294         | exp ',' opt_nls exp
295           {
296                 INSTRUCTION *tp;
297
298                 add_lint($1, LINT_assign_in_cond);
299                 add_lint($4, LINT_assign_in_cond);
300
301                 tp = instruction(Op_no_op);
302                 list_prepend($1, bcalloc(Op_line_range, !!do_pretty_print + 1, 0));
303                 $1->nexti->triggered = false;
304                 $1->nexti->target_jmp = $4->nexti;
305
306                 list_append($1, instruction(Op_cond_pair));
307                 $1->lasti->line_range = $1->nexti;
308                 $1->lasti->target_jmp = tp;
309
310                 list_append($4, instruction(Op_cond_pair));
311                 $4->lasti->line_range = $1->nexti;
312                 $4->lasti->target_jmp = tp;
313                 if (do_pretty_print) {
314                         ($1->nexti + 1)->condpair_left = $1->lasti;
315                         ($1->nexti + 1)->condpair_right = $4->lasti;
316                 }
317                 $$ = list_append(list_merge($1, $4), tp);
318                 rule = Rule;
319           }
320         | LEX_BEGIN
321           {
322                 static int begin_seen = 0;
323                 if (do_lint_old && ++begin_seen == 2)
324                         warning_ln($1->source_line,
325                                 _("old awk does not support multiple `BEGIN' or `END' rules"));
326
327                 $1->in_rule = rule = BEGIN;
328                 $1->source_file = source;
329                 $$ = $1;
330           }
331         | LEX_END
332           {
333                 static int end_seen = 0;
334                 if (do_lint_old && ++end_seen == 2)
335                         warning_ln($1->source_line,
336                                 _("old awk does not support multiple `BEGIN' or `END' rules"));
337
338                 $1->in_rule = rule = END;
339                 $1->source_file = source;
340                 $$ = $1;
341           }
342         | LEX_BEGINFILE
343           {
344                 $1->in_rule = rule = BEGINFILE;
345                 $1->source_file = source;
346                 $$ = $1;
347           }
348         | LEX_ENDFILE
349           {
350                 $1->in_rule = rule = ENDFILE;
351                 $1->source_file = source;
352                 $$ = $1;
353           }
354         ;
355
356 action
357         : l_brace statements r_brace opt_semi opt_nls
358           {
359                 if ($2 == NULL)
360                         $$ = list_create(instruction(Op_no_op));
361                 else
362                         $$ = $2;
363           }
364         ;
365
366 func_name
367         : NAME
368           { $$ = $1; }
369         | FUNC_CALL
370           { $$ = $1; }
371         | lex_builtin
372           {
373                 yyerror(_("`%s' is a built-in function, it cannot be redefined"),
374                                         tokstart);
375                 YYABORT;
376           }
377         | '@' LEX_EVAL
378           { $$ = $2; }
379         ;
380
381 lex_builtin
382         : LEX_BUILTIN
383         | LEX_LENGTH
384         ;
385                 
386 function_prologue
387         : LEX_FUNCTION func_name '(' opt_param_list r_paren opt_nls
388           {
389                 $1->source_file = source;
390                 if (install_function($2->lextok, $1, $4) < 0)
391                         YYABORT;
392                 in_function = $2->lextok;
393                 $2->lextok = NULL;
394                 bcfree($2);
395                 /* $4 already free'd in install_function */
396                 $$ = $1;
397           }
398         ;
399
400 regexp
401         /*
402          * In this rule, want_regexp tells yylex that the next thing
403          * is a regexp so it should read up to the closing slash.
404          */
405         : a_slash
406                 { want_regexp = true; }
407           REGEXP        /* The terminating '/' is consumed by yylex(). */
408                 {
409                   NODE *n, *exp;
410                   char *re;
411                   size_t len;
412
413                   re = $3->lextok;
414                   $3->lextok = NULL;
415                   len = strlen(re);
416                   if (do_lint) {
417                         if (len == 0)
418                                 lintwarn_ln($3->source_line,
419                                         _("regexp constant `//' looks like a C++ comment, but is not"));
420                         else if (re[0] == '*' && re[len-1] == '*')
421                                 /* possible C comment */
422                                 lintwarn_ln($3->source_line,
423                                         _("regexp constant `/%s/' looks like a C comment, but is not"), re);
424                   }
425
426                   exp = make_str_node(re, len, ALREADY_MALLOCED);
427                   n = make_regnode(Node_regex, exp);
428                   if (n == NULL) {
429                         unref(exp);
430                         YYABORT;
431                   }
432                   $$ = $3;
433                   $$->opcode = Op_match_rec;
434                   $$->memory = n;
435                 }
436         ;
437
438 a_slash
439         : '/'
440           { bcfree($1); }
441         | SLASH_BEFORE_EQUAL
442         ;
443
444 statements
445         : /* empty */
446           {     $$ = NULL; }
447         | statements statement
448           {
449                 if ($2 == NULL)
450                         $$ = $1;
451                 else {
452                         add_lint($2, LINT_no_effect);
453                         if ($1 == NULL)
454                                 $$ = $2;
455                         else
456                                 $$ = list_merge($1, $2);
457                 }
458             yyerrok;
459           }
460         | statements error
461           {     $$ = NULL; }
462         ;
463
464 statement_term
465         : nls
466         | semi opt_nls
467         ;
468
469 statement
470         : semi opt_nls
471           { $$ = NULL; }
472         | l_brace statements r_brace
473           { $$ = $2; }
474         | if_statement
475           {
476                 if (do_pretty_print)
477                         $$ = list_prepend($1, instruction(Op_exec_count));
478                 else
479                         $$ = $1;
480           }
481         | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
482           {
483                 INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
484                 INSTRUCTION *ip, *nextc, *tbreak;
485                 const char **case_values = NULL;
486                 int maxcount = 128;
487                 int case_count = 0;
488                 int i;
489
490                 tbreak = instruction(Op_no_op); 
491                 cstmt = list_create(tbreak);
492                 cexp = list_create(instruction(Op_pop));
493                 dflt = instruction(Op_jmp);
494                 dflt->target_jmp = tbreak;      /* if no case match and no explicit default */
495
496                 if ($7 != NULL) {
497                         curr = $7->nexti;
498                         bcfree($7);     /* Op_list */
499                 } /*  else
500                                 curr = NULL; */
501
502                 for(; curr != NULL; curr = nextc) {
503                         INSTRUCTION *caseexp = curr->case_exp;
504                         INSTRUCTION *casestmt = curr->case_stmt;
505
506                         nextc = curr->nexti;
507                         if (curr->opcode == Op_K_case) {
508                                 if (caseexp->opcode == Op_push_i) {
509                                         /* a constant scalar */
510                                         char *caseval;
511                                         caseval = force_string(caseexp->memory)->stptr;
512                                         for (i = 0; i < case_count; i++) {
513                                                 if (strcmp(caseval, case_values[i]) == 0)
514                                                         error_ln(curr->source_line,
515                                                                 _("duplicate case values in switch body: %s"), caseval);
516                                         }
517  
518                                         if (case_values == NULL)
519                                                 emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
520                                         else if (case_count >= maxcount) {
521                                                 maxcount += 128;
522                                                 erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
523                                         }
524                                         case_values[case_count++] = caseval;
525                                 } else {
526                                         /* match a constant regex against switch expression. */
527                                         (curr + 1)->match_exp = true;
528                                 }
529                                 curr->stmt_start = casestmt->nexti;
530                                 curr->stmt_end  = casestmt->lasti;
531                                 (void) list_prepend(cexp, curr);
532                                 (void) list_prepend(cexp, caseexp);
533                         } else {
534                                 if (dflt->target_jmp != tbreak)
535                                         error_ln(curr->source_line,
536                                                 _("duplicate `default' detected in switch body"));
537                                 else
538                                         dflt->target_jmp = casestmt->nexti;
539
540                                 if (do_pretty_print) {
541                                         curr->stmt_start = casestmt->nexti;
542                                         curr->stmt_end = casestmt->lasti;
543                                         (void) list_prepend(cexp, curr);
544                                 } else
545                                         bcfree(curr);
546                         }
547
548                         cstmt = list_merge(casestmt, cstmt);
549                 }
550
551                 if (case_values != NULL)
552                         efree(case_values);
553
554                 ip = $3;
555                 if (do_pretty_print) {
556                         (void) list_prepend(ip, $1);
557                         (void) list_prepend(ip, instruction(Op_exec_count));
558                         $1->target_break = tbreak;
559                         ($1 + 1)->switch_start = cexp->nexti;
560                         ($1 + 1)->switch_end = cexp->lasti;
561                 }/* else
562                                 $1 is NULL */
563
564                 (void) list_append(cexp, dflt);
565                 (void) list_merge(ip, cexp);
566                 $$ = list_merge(ip, cstmt);
567
568                 break_allowed--;                        
569                 fix_break_continue(ip, tbreak, NULL);
570           }
571         | LEX_WHILE '(' exp r_paren opt_nls statement
572           { 
573                 /*
574                  *    -----------------
575                  * tc:
576                  *         cond
577                  *    -----------------
578                  *    [Op_jmp_false tb   ]
579                  *    -----------------   
580                  *         body
581                  *    -----------------
582                  *    [Op_jmp      tc    ]
583                  * tb:[Op_no_op          ]
584                  */
585
586                 INSTRUCTION *ip, *tbreak, *tcont;
587
588                 tbreak = instruction(Op_no_op);
589                 add_lint($3, LINT_assign_in_cond);
590                 tcont = $3->nexti;
591                 ip = list_append($3, instruction(Op_jmp_false));
592                 ip->lasti->target_jmp = tbreak;
593
594                 if (do_pretty_print) {
595                         (void) list_append(ip, instruction(Op_exec_count));
596                         $1->target_break = tbreak;
597                         $1->target_continue = tcont;
598                         ($1 + 1)->while_body = ip->lasti;
599                         (void) list_prepend(ip, $1);
600                 }/* else
601                                 $1 is NULL */
602
603                 if ($6 != NULL)
604                         (void) list_merge(ip, $6);
605                 (void) list_append(ip, instruction(Op_jmp));
606                 ip->lasti->target_jmp = tcont;
607                 $$ = list_append(ip, tbreak);
608
609                 break_allowed--;
610                 continue_allowed--;
611                 fix_break_continue(ip, tbreak, tcont);
612           }
613         | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
614           {
615                 /*
616                  *    -----------------
617                  * z:
618                  *         body
619                  *    -----------------
620                  * tc: 
621                  *         cond
622                  *    -----------------
623                  *    [Op_jmp_true | z  ]
624                  * tb:[Op_no_op         ]
625                  */
626
627                 INSTRUCTION *ip, *tbreak, *tcont;
628
629                 tbreak = instruction(Op_no_op);
630                 tcont = $6->nexti;
631                 add_lint($6, LINT_assign_in_cond);
632                 if ($3 != NULL)
633                         ip = list_merge($3, $6);
634                 else
635                         ip = list_prepend($6, instruction(Op_no_op));
636                 if (do_pretty_print)
637                         (void) list_prepend(ip, instruction(Op_exec_count));
638                 (void) list_append(ip, instruction(Op_jmp_true));
639                 ip->lasti->target_jmp = ip->nexti;
640                 $$ = list_append(ip, tbreak);
641
642                 break_allowed--;
643                 continue_allowed--;
644                 fix_break_continue(ip, tbreak, tcont);
645
646                 if (do_pretty_print) {
647                         $1->target_break = tbreak;
648                         $1->target_continue = tcont;
649                         ($1 + 1)->doloop_cond = tcont;
650                         $$ = list_prepend(ip, $1);
651                         bcfree($4);
652                 } /* else
653                         $1 and $4 are NULLs */
654           }
655         | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
656           {
657                 INSTRUCTION *ip;
658                 char *var_name = $3->lextok;
659
660                 if ($8 != NULL
661                                 && $8->lasti->opcode == Op_K_delete
662                                 && $8->lasti->expr_count == 1
663                                 && $8->nexti->opcode == Op_push
664                                 && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
665                                 && strcmp($8->nexti->memory->vname, var_name) == 0
666                 ) {
667                 
668                 /* Efficiency hack.  Recognize the special case of
669                  *
670                  *      for (iggy in foo)
671                  *              delete foo[iggy]
672                  *
673                  * and treat it as if it were
674                  *
675                  *      delete foo
676                  *
677                  * Check that the body is a `delete a[i]' statement,
678                  * and that both the loop var and array names match.
679                  */              
680                         NODE *arr = NULL;
681
682                         ip = $8->nexti->nexti; 
683                         if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
684                                 arr = $5->nexti->memory;
685                         if (arr != NULL
686                                         && ip->opcode == Op_no_op
687                                         && ip->nexti->opcode == Op_push_array
688                                         && strcmp(ip->nexti->memory->vname, arr->vname) == 0
689                                         && ip->nexti->nexti == $8->lasti
690                         ) {
691                                 (void) make_assignable($8->nexti);
692                                 $8->lasti->opcode = Op_K_delete_loop;
693                                 $8->lasti->expr_count = 0;
694                                 if ($1 != NULL)
695                                         bcfree($1);
696                                 efree(var_name);
697                                 bcfree($3);
698                                 bcfree($4);
699                                 bcfree($5);
700                                 $$ = $8;
701                         } else
702                                 goto regular_loop;
703                 } else {
704                         INSTRUCTION *tbreak, *tcont;
705
706                         /*    [ Op_push_array a       ]
707                          *    [ Op_arrayfor_init | ib ]
708                          * ic:[ Op_arrayfor_incr | ib ] 
709                          *    [ Op_var_assign if any  ]
710                          *
711                          *              body
712                          *
713                          *    [Op_jmp | ic            ]
714                          * ib:[Op_arrayfor_final      ]
715                          */
716 regular_loop:
717                         ip = $5;
718                         ip->nexti->opcode = Op_push_array;
719
720                         tbreak = instruction(Op_arrayfor_final);
721                         $4->opcode = Op_arrayfor_incr;
722                         $4->array_var = variable($3->source_line, var_name, Node_var);
723                         $4->target_jmp = tbreak;
724                         tcont = $4;
725                         $3->opcode = Op_arrayfor_init;
726                         $3->target_jmp = tbreak;
727                         (void) list_append(ip, $3);
728
729                         if (do_pretty_print) {
730                                 $1->opcode = Op_K_arrayfor;
731                                 $1->target_continue = tcont;
732                                 $1->target_break = tbreak;
733                                 (void) list_append(ip, $1);
734                         } /* else
735                                         $1 is NULL */
736
737                         /* add update_FOO instruction if necessary */ 
738                         if ($4->array_var->type == Node_var && $4->array_var->var_update) {
739                                 (void) list_append(ip, instruction(Op_var_update));
740                                 ip->lasti->update_var = $4->array_var->var_update;
741                         }
742                         (void) list_append(ip, $4);
743
744                         /* add set_FOO instruction if necessary */
745                         if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
746                                 (void) list_append(ip, instruction(Op_var_assign));
747                                 ip->lasti->assign_var = $4->array_var->var_assign;
748                         }
749
750                         if (do_pretty_print) {
751                                 (void) list_append(ip, instruction(Op_exec_count));
752                                 ($1 + 1)->forloop_cond = $4;
753                                 ($1 + 1)->forloop_body = ip->lasti; 
754                         }
755
756                         if ($8 != NULL)
757                                 (void) list_merge(ip, $8);
758
759                         (void) list_append(ip, instruction(Op_jmp));
760                         ip->lasti->target_jmp = $4;
761                         $$ = list_append(ip, tbreak);
762                         fix_break_continue(ip, tbreak, tcont);
763                 } 
764
765                 break_allowed--;
766                 continue_allowed--;
767           }
768         | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
769           {
770                 $$ = mk_for_loop($1, $3, $6, $9, $12);
771
772                 break_allowed--;
773                 continue_allowed--;
774           }
775         | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
776           {
777                 $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
778
779                 break_allowed--;
780                 continue_allowed--;
781           }
782         | non_compound_stmt
783           {
784                 if (do_pretty_print)
785                         $$ = list_prepend($1, instruction(Op_exec_count));
786                 else
787                         $$ = $1;
788           }
789         ;
790
791 non_compound_stmt
792         : LEX_BREAK statement_term
793           { 
794                 if (! break_allowed)
795                         error_ln($1->source_line,
796                                 _("`break' is not allowed outside a loop or switch"));
797                 $1->target_jmp = NULL;
798                 $$ = list_create($1);
799
800           }
801         | LEX_CONTINUE statement_term
802           {
803                 if (! continue_allowed)
804                         error_ln($1->source_line,
805                                 _("`continue' is not allowed outside a loop"));
806                 $1->target_jmp = NULL;
807                 $$ = list_create($1);
808
809           }
810         | LEX_NEXT statement_term
811           {
812                 /* if inside function (rule = 0), resolve context at run-time */
813                 if (rule && rule != Rule)
814                         error_ln($1->source_line,
815                                 _("`next' used in %s action"), ruletab[rule]);
816                 $1->target_jmp = ip_rec;
817                 $$ = list_create($1);
818           }
819         | LEX_NEXTFILE statement_term
820           {
821                 /* if inside function (rule = 0), resolve context at run-time */
822                 if (rule == BEGIN || rule == END || rule == ENDFILE)
823                         error_ln($1->source_line,
824                                 _("`nextfile' used in %s action"), ruletab[rule]);
825
826                 $1->target_newfile = ip_newfile;
827                 $1->target_endfile = ip_endfile;
828                 $$ = list_create($1);
829           }
830         | LEX_EXIT opt_exp statement_term
831           {
832                 /* Initialize the two possible jump targets, the actual target
833                  * is resolved at run-time. 
834                  */
835                 $1->target_end = ip_end;        /* first instruction in end_block */
836                 $1->target_atexit = ip_atexit;  /* cleanup and go home */
837
838                 if ($2 == NULL) {
839                         $$ = list_create($1);
840                         (void) list_prepend($$, instruction(Op_push_i));
841                         $$->nexti->memory = dupnode(Nnull_string);
842                 } else
843                         $$ = list_append($2, $1);
844           }
845         | LEX_RETURN
846           {
847                 if (! in_function)
848                         yyerror(_("`return' used outside function context"));
849           } opt_exp statement_term {
850                 if ($3 == NULL) {
851                         $$ = list_create($1);
852                         (void) list_prepend($$, instruction(Op_push_i));
853                         $$->nexti->memory = dupnode(Nnull_string);
854                 } else {
855                         if (do_optimize
856                                 && $3->lasti->opcode == Op_func_call
857                                 && strcmp($3->lasti->func_name, in_function) == 0
858                         ) {
859                                 /* Do tail recursion optimization. Tail
860                                  * call without a return value is recognized
861                                  * in mk_function().
862                                  */
863                                 ($3->lasti + 1)->tail_call = true;
864                         }
865
866                         $$ = list_append($3, $1);
867                 }
868           }
869         | simple_stmt statement_term
870         ;
871
872         /*
873          * A simple_stmt exists to satisfy a constraint in the POSIX
874          * grammar allowing them to occur as the 1st and 3rd parts
875          * in a `for (...;...;...)' loop.  This is a historical oddity
876          * inherited from Unix awk, not at all documented in the AK&W
877          * awk book.  We support it, as this was reported as a bug.
878          * We don't bother to document it though. So there.
879          */
880 simple_stmt
881         : print { in_print = true; in_parens = 0; } print_expression_list output_redir
882           {
883                 /*
884                  * Optimization: plain `print' has no expression list, so $3 is null.
885                  * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
886                  * which is faster for these two cases.
887                  */
888
889                 if ($1->opcode == Op_K_print &&
890                         ($3 == NULL
891                                 || ($3->lasti->opcode == Op_field_spec
892                                         && $3->nexti->nexti->nexti == $3->lasti
893                                         && $3->nexti->nexti->opcode == Op_push_i
894                                         && $3->nexti->nexti->memory->type == Node_val)
895                         )
896                 ) {
897                         static bool warned = false;
898                         /*   -----------------
899                          *      output_redir
900                          *    [ redirect exp ]
901                          *   -----------------
902                          *     expression_list
903                          *   ------------------
904                          *    [Op_K_print_rec | NULL | redir_type | expr_count]
905                          */
906
907                         if ($3 != NULL) {
908                                 NODE *n = $3->nexti->nexti->memory;
909
910                                 if (! iszero(n))
911                                         goto regular_print;
912
913                                 bcfree($3->lasti);                      /* Op_field_spec */
914                                 unref(n);                               /* Node_val */
915                                 bcfree($3->nexti->nexti);               /* Op_push_i */
916                                 bcfree($3->nexti);                      /* Op_list */
917                                 bcfree($3);                             /* Op_list */
918                         } else {
919                                 if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
920                                         warned = true;
921                                         lintwarn_ln($1->source_line,
922                 _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
923                                 }
924                         }
925
926                         $1->expr_count = 0;
927                         $1->opcode = Op_K_print_rec;
928                         if ($4 == NULL) {    /* no redircetion */
929                                 $1->redir_type = redirect_none;
930                                 $$ = list_create($1);
931                         } else {
932                                 INSTRUCTION *ip;
933                                 ip = $4->nexti;
934                                 $1->redir_type = ip->redir_type;
935                                 $4->nexti = ip->nexti;
936                                 bcfree(ip);
937                                 $$ = list_append($4, $1);
938                         }
939                 } else {
940                         /*   -----------------
941                          *    [ output_redir    ]
942                          *    [ redirect exp    ]
943                          *   -----------------
944                          *    [ expression_list ]
945                          *   ------------------
946                          *    [$1 | NULL | redir_type | expr_count]
947                          *
948                          */
949 regular_print:   
950                         if ($4 == NULL) {               /* no redirection */
951                                 if ($3 == NULL) {       /* printf without arg */
952                                         $1->expr_count = 0;
953                                         $1->redir_type = redirect_none;
954                                         $$ = list_create($1);
955                                 } else {
956                                         INSTRUCTION *t = $3;
957                                         $1->expr_count = count_expressions(&t, false);
958                                         $1->redir_type = redirect_none;
959                                         $$ = list_append(t, $1);
960                                 }
961                         } else {
962                                 INSTRUCTION *ip;
963                                 ip = $4->nexti;
964                                 $1->redir_type = ip->redir_type;
965                                 $4->nexti = ip->nexti;
966                                 bcfree(ip);
967                                 if ($3 == NULL) {
968                                         $1->expr_count = 0;
969                                         $$ = list_append($4, $1);
970                                 } else {
971                                         INSTRUCTION *t = $3;
972                                         $1->expr_count = count_expressions(&t, false);
973                                         $$ = list_append(list_merge($4, t), $1);
974                                 }
975                         }
976                 }
977           }
978
979         | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
980           {
981                 char *arr = $2->lextok;
982
983                 $2->opcode = Op_push_array;
984                 $2->memory = variable($2->source_line, arr, Node_var_new);
985
986                 if (! do_posix && ! do_traditional) {
987                         if ($2->memory == symbol_table)
988                                 fatal(_("`delete' is not allowed with SYMTAB"));
989                         else if ($2->memory == func_table)
990                                 fatal(_("`delete' is not allowed with FUNCTAB"));
991                 }
992
993                 if ($4 == NULL) {
994                         /*
995                          * As of September 2012, POSIX has added support
996                          * for `delete array'. See:
997                          * http://austingroupbugs.net/view.php?id=544
998                          *
999                          * Thanks to Nathan Weeks for the initiative.
1000                          *
1001                          * Thus we no longer warn or check do_posix.
1002                          * Also, since BWK awk supports it, we don't have to
1003                          * check do_traditional either.
1004                          */
1005                         $1->expr_count = 0;
1006                         $$ = list_append(list_create($2), $1);
1007                 } else {
1008                         $1->expr_count = sub_counter;
1009                         $$ = list_append(list_append($4, $2), $1);
1010                 }
1011           }     
1012         | LEX_DELETE '(' NAME ')'
1013                   /*
1014                    * this is for tawk compatibility. maybe the warnings
1015                    * should always be done.
1016                    */
1017           {
1018                 static bool warned = false;
1019                 char *arr = $3->lextok;
1020
1021                 if (do_lint && ! warned) {
1022                         warned = true;
1023                         lintwarn_ln($1->source_line,
1024                                 _("`delete(array)' is a non-portable tawk extension"));
1025                 }
1026                 if (do_traditional) {
1027                         error_ln($1->source_line,
1028                                 _("`delete(array)' is a non-portable tawk extension"));
1029                 }
1030                 $3->memory = variable($3->source_line, arr, Node_var_new);
1031                 $3->opcode = Op_push_array;
1032                 $1->expr_count = 0;
1033                 $$ = list_append(list_create($3), $1);
1034
1035                 if (! do_posix && ! do_traditional) {
1036                         if ($3->memory == symbol_table)
1037                                 fatal(_("`delete' is not allowed with SYMTAB"));
1038                         else if ($3->memory == func_table)
1039                                 fatal(_("`delete' is not allowed with FUNCTAB"));
1040                 }
1041           }
1042         | exp
1043           {     $$ = optimize_assignment($1); }
1044         ;
1045
1046 opt_simple_stmt
1047         : /* empty */
1048           { $$ = NULL; }
1049         | simple_stmt
1050           { $$ = $1; }
1051         ;
1052
1053 case_statements
1054         : /* empty */
1055           { $$ = NULL; }
1056         | case_statements case_statement
1057           {
1058                 if ($1 == NULL)
1059                         $$ = list_create($2);
1060                 else
1061                         $$ = list_prepend($1, $2);
1062           }
1063         | case_statements error
1064           { $$ = NULL; }
1065         ;
1066
1067 case_statement
1068         : LEX_CASE case_value colon opt_nls statements
1069           {
1070                 INSTRUCTION *casestmt = $5;
1071                 if ($5 == NULL)
1072                         casestmt = list_create(instruction(Op_no_op));  
1073                 if (do_pretty_print)
1074                         (void) list_prepend(casestmt, instruction(Op_exec_count));
1075                 $1->case_exp = $2;
1076                 $1->case_stmt = casestmt;
1077                 bcfree($3);
1078                 $$ = $1;
1079           }
1080         | LEX_DEFAULT colon opt_nls statements
1081           {
1082                 INSTRUCTION *casestmt = $4;
1083                 if ($4 == NULL)
1084                         casestmt = list_create(instruction(Op_no_op));
1085                 if (do_pretty_print)
1086                         (void) list_prepend(casestmt, instruction(Op_exec_count));
1087                 bcfree($2);
1088                 $1->case_stmt = casestmt;
1089                 $$ = $1;
1090           }
1091         ;
1092
1093 case_value
1094         : YNUMBER
1095           {     $$ = $1; }
1096         | '-' YNUMBER    %prec UNARY
1097           { 
1098                 NODE *n = $2->memory;
1099                 (void) force_number(n);
1100                 negate_num(n);
1101                 bcfree($1);
1102                 $$ = $2;
1103           }
1104         | '+' YNUMBER    %prec UNARY
1105           {
1106                 bcfree($1);
1107                 $$ = $2;
1108           }
1109         | YSTRING 
1110           {     $$ = $1; }
1111         | regexp  
1112           {
1113                 $1->opcode = Op_push_re;
1114                 $$ = $1;
1115           }
1116         ;
1117
1118 print
1119         : LEX_PRINT
1120           { $$ = $1; }
1121         | LEX_PRINTF
1122           { $$ = $1; }
1123         ;
1124
1125         /*
1126          * Note: ``print(x)'' is already parsed by the first rule,
1127          * so there is no good in covering it by the second one too.
1128          */
1129 print_expression_list
1130         : opt_expression_list
1131         | '(' expression_list r_paren
1132           {
1133                 $$ = $2;
1134           }
1135         ;
1136
1137 output_redir
1138         : /* empty */
1139           {
1140                 in_print = false;
1141                 in_parens = 0;
1142                 $$ = NULL;
1143           }
1144         | IO_OUT { in_print = false; in_parens = 0; } common_exp
1145           {
1146                 if ($1->redir_type == redirect_twoway
1147                         && $3->lasti->opcode == Op_K_getline_redir
1148                                 && $3->lasti->redir_type == redirect_twoway)
1149                         yyerror(_("multistage two-way pipelines don't work"));
1150                 $$ = list_prepend($3, $1);
1151           }
1152         ;
1153
1154 if_statement
1155         : LEX_IF '(' exp r_paren opt_nls statement
1156           {
1157                 $$ = mk_condition($3, $1, $6, NULL, NULL);
1158           }
1159         | LEX_IF '(' exp r_paren opt_nls statement
1160              LEX_ELSE opt_nls statement
1161           {
1162                 $$ = mk_condition($3, $1, $6, $7, $9);
1163           }
1164         ;
1165
1166 nls
1167         : NEWLINE
1168         | nls NEWLINE
1169         ;
1170
1171 opt_nls
1172         : /* empty */
1173         | nls
1174         ;
1175
1176 input_redir
1177         : /* empty */
1178           { $$ = NULL; }
1179         | '<' simp_exp
1180           {
1181                 bcfree($1);
1182                 $$ = $2;
1183           }
1184         ;
1185
1186 opt_param_list
1187         : /* empty */
1188           { $$ = NULL; }
1189         | param_list
1190           { $$ = $1 ; }
1191         ;
1192
1193 param_list
1194         : NAME
1195           {
1196                 $1->param_count = 0;
1197                 $$ = list_create($1);
1198           }
1199         | param_list comma NAME
1200           {
1201                 $3->param_count =  $1->lasti->param_count + 1;
1202                 $$ = list_append($1, $3);
1203                 yyerrok;
1204           }
1205         | error
1206           { $$ = NULL; }
1207         | param_list error
1208           { $$ = $1; }
1209         | param_list comma error
1210           { $$ = $1; }
1211         ;
1212
1213 /* optional expression, as in for loop */
1214 opt_exp
1215         : /* empty */
1216           { $$ = NULL; }
1217         | exp
1218           { $$ = $1; }
1219         ;
1220
1221 opt_expression_list
1222         : /* empty */
1223           { $$ = NULL; }
1224         | expression_list
1225           { $$ = $1; }
1226         ;
1227
1228 expression_list
1229         : exp
1230           {     $$ = mk_expression_list(NULL, $1); }
1231         | expression_list comma exp
1232           {
1233                 $$ = mk_expression_list($1, $3);
1234                 yyerrok;
1235           }
1236         | error
1237           { $$ = NULL; }
1238         | expression_list error
1239           {
1240                 /*
1241                  * Returning the expression list instead of NULL lets
1242                  * snode get a list of arguments that it can count.
1243                  */
1244                 $$ = $1;
1245           }
1246         | expression_list error exp
1247           {
1248                 /* Ditto */
1249                 $$ = mk_expression_list($1, $3);
1250           }
1251         | expression_list comma error
1252           {
1253                 /* Ditto */
1254                 $$ = $1;
1255           }
1256         ;
1257
1258 /* Expressions, not including the comma operator.  */
1259 exp
1260         : variable assign_operator exp %prec ASSIGNOP
1261           {
1262                 if (do_lint && $3->lasti->opcode == Op_match_rec)
1263                         lintwarn_ln($2->source_line,
1264                                 _("regular expression on right of assignment"));
1265                 $$ = mk_assignment($1, $3, $2);
1266           }
1267         | exp LEX_AND exp
1268           {     $$ = mk_boolean($1, $3, $2); }
1269         | exp LEX_OR exp
1270           {     $$ = mk_boolean($1, $3, $2); }
1271         | exp MATCHOP exp
1272           {
1273                 if ($1->lasti->opcode == Op_match_rec)
1274                         warning_ln($2->source_line,
1275                                 _("regular expression on left of `~' or `!~' operator"));
1276
1277                 if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
1278                         $2->memory = $3->nexti->memory;
1279                         bcfree($3->nexti);      /* Op_match_rec */
1280                         bcfree($3);                     /* Op_list */
1281                         $$ = list_append($1, $2);
1282                 } else {
1283                         $2->memory = make_regnode(Node_dynregex, NULL);
1284                         $$ = list_append(list_merge($1, $3), $2);
1285                 }
1286           }
1287         | exp LEX_IN simple_variable
1288           {
1289                 if (do_lint_old)
1290                         warning_ln($2->source_line,
1291                                 _("old awk does not support the keyword `in' except after `for'"));
1292                 $3->nexti->opcode = Op_push_array;
1293                 $2->opcode = Op_in_array;
1294                 $2->expr_count = 1;
1295                 $$ = list_append(list_merge($1, $3), $2);
1296           }
1297         | exp a_relop exp %prec RELOP
1298           {
1299                 if (do_lint && $3->lasti->opcode == Op_match_rec)
1300                         lintwarn_ln($2->source_line,
1301                                 _("regular expression on right of comparison"));
1302                 $$ = list_append(list_merge($1, $3), $2);
1303           }
1304         | exp '?' exp ':' exp
1305           { $$ = mk_condition($1, $2, $3, $4, $5); }
1306         | common_exp
1307           { $$ = $1; }
1308         ;
1309
1310 assign_operator
1311         : ASSIGN
1312           { $$ = $1; }
1313         | ASSIGNOP
1314           { $$ = $1; }
1315         | SLASH_BEFORE_EQUAL ASSIGN   /* `/=' */
1316           {     
1317                 $2->opcode = Op_assign_quotient;
1318                 $$ = $2;
1319           }
1320         ;
1321
1322 relop_or_less
1323         : RELOP
1324           { $$ = $1; }
1325         | '<'
1326           { $$ = $1; }
1327         ;
1328
1329 a_relop
1330         : relop_or_less
1331           { $$ = $1; }
1332         | '>'
1333           { $$ = $1; }
1334         ;
1335
1336 common_exp
1337         : simp_exp
1338           { $$ = $1; }
1339         | simp_exp_nc
1340           { $$ = $1; }
1341         | common_exp simp_exp %prec CONCAT_OP
1342           {
1343                 int count = 2;
1344                 bool is_simple_var = false;
1345
1346                 if ($1->lasti->opcode == Op_concat) {
1347                         /* multiple (> 2) adjacent strings optimization */
1348                         is_simple_var = ($1->lasti->concat_flag & CSVAR);
1349                         count = $1->lasti->expr_count + 1;
1350                         $1->lasti->opcode = Op_no_op;
1351                 } else {
1352                         is_simple_var = ($1->nexti->opcode == Op_push
1353                                         && $1->lasti == $1->nexti); /* first exp. is a simple
1354                                                                      * variable?; kludge for use
1355                                                                      * in Op_assign_concat.
1356                                                                      */
1357                 }
1358
1359                 if (do_optimize
1360                         && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
1361                         && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
1362                 ) {
1363                         NODE *n1 = $1->nexti->memory;
1364                         NODE *n2 = $2->nexti->memory;
1365                         size_t nlen;
1366
1367                         n1 = force_string(n1);
1368                         n2 = force_string(n2);
1369                         nlen = n1->stlen + n2->stlen;
1370                         erealloc(n1->stptr, char *, nlen + 2, "constant fold");
1371                         memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
1372                         n1->stlen = nlen;
1373                         n1->stptr[nlen] = '\0';
1374                         n1->flags &= ~(NUMCUR|NUMBER|NUMINT);
1375                         n1->flags |= (STRING|STRCUR);
1376                         unref(n2);
1377                         bcfree($2->nexti);
1378                         bcfree($2);
1379                         $$ = $1;
1380                 } else {
1381                         $$ = list_append(list_merge($1, $2), instruction(Op_concat));
1382                         $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
1383                         $$->lasti->expr_count = count;
1384                         if (count > max_args)
1385                                 max_args = count;
1386                 }
1387           }
1388         ;
1389
1390 simp_exp
1391         : non_post_simp_exp
1392         /* Binary operators in order of decreasing precedence.  */
1393         | simp_exp '^' simp_exp
1394           { $$ = mk_binary($1, $3, $2); }
1395         | simp_exp '*' simp_exp
1396           { $$ = mk_binary($1, $3, $2); }
1397         | simp_exp '/' simp_exp
1398           { $$ = mk_binary($1, $3, $2); }
1399         | simp_exp '%' simp_exp
1400           { $$ = mk_binary($1, $3, $2); }
1401         | simp_exp '+' simp_exp
1402           { $$ = mk_binary($1, $3, $2); }
1403         | simp_exp '-' simp_exp
1404           { $$ = mk_binary($1, $3, $2); }
1405         | LEX_GETLINE opt_variable input_redir
1406           {
1407                 /*
1408                  * In BEGINFILE/ENDFILE, allow `getline var < file'
1409                  */
1410
1411                 if (rule == BEGINFILE || rule == ENDFILE) {
1412                         if ($2 != NULL && $3 != NULL)
1413                                 ;        /* all  ok */
1414                         else {
1415                                 if ($2 != NULL)
1416                                         error_ln($1->source_line,
1417                                                 _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
1418                                 else
1419                                         error_ln($1->source_line,
1420                                                 _("`getline' invalid inside `%s' rule"), ruletab[rule]);
1421                         }
1422                 }
1423                 if (do_lint && rule == END && $3 == NULL)
1424                         lintwarn_ln($1->source_line,
1425                                 _("non-redirected `getline' undefined inside END action"));
1426                 $$ = mk_getline($1, $2, $3, redirect_input);
1427           }
1428         | variable INCREMENT
1429           {
1430                 $2->opcode = Op_postincrement;
1431                 $$ = mk_assignment($1, NULL, $2);
1432           }
1433         | variable DECREMENT
1434           {
1435                 $2->opcode = Op_postdecrement;
1436                 $$ = mk_assignment($1, NULL, $2);
1437           }
1438         | '(' expression_list r_paren LEX_IN simple_variable
1439           {
1440                 if (do_lint_old) {
1441                     warning_ln($4->source_line,
1442                                 _("old awk does not support the keyword `in' except after `for'"));
1443                     warning_ln($4->source_line,
1444                                 _("old awk does not support multidimensional arrays"));
1445                 }
1446                 $5->nexti->opcode = Op_push_array;
1447                 $4->opcode = Op_in_array;
1448                 if ($2 == NULL) {       /* error */
1449                         errcount++;
1450                         $4->expr_count = 0;
1451                         $$ = list_merge($5, $4);
1452                 } else {
1453                         INSTRUCTION *t = $2;
1454                         $4->expr_count = count_expressions(&t, false);
1455                         $$ = list_append(list_merge(t, $5), $4);
1456                 }
1457           }
1458         ;
1459
1460 /* Expressions containing "| getline" lose the ability to be on the
1461    right-hand side of a concatenation. */
1462 simp_exp_nc
1463         : common_exp IO_IN LEX_GETLINE opt_variable
1464                 {
1465                   $$ = mk_getline($3, $4, $1, $2->redir_type);
1466                   bcfree($2);
1467                 }
1468         /* Binary operators in order of decreasing precedence.  */
1469         | simp_exp_nc '^' simp_exp
1470           { $$ = mk_binary($1, $3, $2); }
1471         | simp_exp_nc '*' simp_exp
1472           { $$ = mk_binary($1, $3, $2); }
1473         | simp_exp_nc '/' simp_exp
1474           { $$ = mk_binary($1, $3, $2); }
1475         | simp_exp_nc '%' simp_exp
1476           { $$ = mk_binary($1, $3, $2); }
1477         | simp_exp_nc '+' simp_exp
1478           { $$ = mk_binary($1, $3, $2); }
1479         | simp_exp_nc '-' simp_exp
1480           { $$ = mk_binary($1, $3, $2); }
1481         ;
1482
1483 non_post_simp_exp
1484         : regexp
1485           {
1486                 $$ = list_create($1);
1487           }
1488         | '!' simp_exp %prec UNARY
1489           {
1490                 if ($2->opcode == Op_match_rec) {
1491                         $2->opcode = Op_nomatch;
1492                         $1->opcode = Op_push_i;
1493                         $1->memory = make_number(0.0);  
1494                         $$ = list_append(list_append(list_create($1),
1495                                                 instruction(Op_field_spec)), $2);
1496                 } else {
1497                         if (do_optimize && $2->nexti == $2->lasti
1498                                         && $2->nexti->opcode == Op_push_i
1499                                         && ($2->nexti->memory->flags & (MPFN|MPZN)) == 0
1500                         ) {
1501                                 NODE *n = $2->nexti->memory;
1502                                 if ((n->flags & (STRCUR|STRING)) != 0) {
1503                                         n->numbr = (AWKNUM) (n->stlen == 0);
1504                                         n->flags &= ~(STRCUR|STRING);
1505                                         n->flags |= (NUMCUR|NUMBER);
1506                                         efree(n->stptr);
1507                                         n->stptr = NULL;
1508                                         n->stlen = 0;
1509                                 } else
1510                                         n->numbr = (AWKNUM) (n->numbr == 0.0);
1511                                 bcfree($1);
1512                                 $$ = $2;
1513                         } else {
1514                                 $1->opcode = Op_not;
1515                                 add_lint($2, LINT_assign_in_cond);
1516                                 $$ = list_append($2, $1);
1517                         }
1518                 }
1519            }
1520         | '(' exp r_paren
1521           { $$ = $2; }
1522         | LEX_BUILTIN '(' opt_expression_list r_paren
1523           {
1524                 $$ = snode($3, $1);
1525                 if ($$ == NULL)
1526                         YYABORT;
1527           }
1528         | LEX_LENGTH '(' opt_expression_list r_paren
1529           {
1530                 $$ = snode($3, $1);
1531                 if ($$ == NULL)
1532                         YYABORT;
1533           }
1534         | LEX_LENGTH
1535           {
1536                 static bool warned = false;
1537
1538                 if (do_lint && ! warned) {
1539                         warned = true;
1540                         lintwarn_ln($1->source_line,
1541                                 _("call of `length' without parentheses is not portable"));
1542                 }
1543                 $$ = snode(NULL, $1);
1544                 if ($$ == NULL)
1545                         YYABORT;
1546           }
1547         | func_call
1548         | variable
1549         | INCREMENT variable
1550           {
1551                 $1->opcode = Op_preincrement;
1552                 $$ = mk_assignment($2, NULL, $1);
1553           }
1554         | DECREMENT variable
1555           {
1556                 $1->opcode = Op_predecrement;
1557                 $$ = mk_assignment($2, NULL, $1);
1558           }
1559         | YNUMBER
1560           {
1561                 $$ = list_create($1);
1562           }
1563         | YSTRING
1564           {
1565                 $$ = list_create($1);
1566           }
1567         | '-' simp_exp    %prec UNARY
1568           {
1569                 if ($2->lasti->opcode == Op_push_i
1570                         && ($2->lasti->memory->flags & (STRCUR|STRING)) == 0
1571                 ) {
1572                         NODE *n = $2->lasti->memory;
1573                         (void) force_number(n);
1574                         negate_num(n);                  
1575                         $$ = $2;
1576                         bcfree($1);
1577                 } else {
1578                         $1->opcode = Op_unary_minus;
1579                         $$ = list_append($2, $1);
1580                 }
1581           }
1582         | '+' simp_exp    %prec UNARY
1583           {
1584             /*
1585              * was: $$ = $2
1586              * POSIX semantics: force a conversion to numeric type
1587              */
1588                 $1->opcode = Op_plus_i;
1589                 $1->memory = make_number(0.0);
1590                 $$ = list_append($2, $1);
1591           }
1592         ;
1593
1594 func_call
1595         : direct_func_call
1596           {
1597                 func_use($1->lasti->func_name, FUNC_USE);
1598                 $$ = $1;
1599           }
1600         | '@' direct_func_call
1601           {
1602                 /* indirect function call */
1603                 INSTRUCTION *f, *t;
1604                 char *name;
1605                 NODE *indirect_var;
1606                 static bool warned = false;
1607                 const char *msg = _("indirect function calls are a gawk extension");
1608
1609                 if (do_traditional || do_posix)
1610                         yyerror("%s", msg);
1611                 else if (do_lint && ! warned) {
1612                         warned = true;
1613                         lintwarn("%s", msg);
1614                 }
1615                 
1616                 f = $2->lasti;
1617                 f->opcode = Op_indirect_func_call;
1618                 name = estrdup(f->func_name, strlen(f->func_name));
1619                 if (is_std_var(name))
1620                         yyerror(_("can not use special variable `%s' for indirect function call"), name);
1621                 indirect_var = variable(f->source_line, name, Node_var_new);
1622                 t = instruction(Op_push);
1623                 t->memory = indirect_var;
1624
1625                 /* prepend indirect var instead of appending to arguments (opt_expression_list),
1626                  * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
1627                  *              f = "fun"
1628                  *              @f(f="real_fun")
1629                  */
1630
1631                 $$ = list_prepend($2, t);
1632           }
1633         ;
1634
1635 direct_func_call
1636         : FUNC_CALL '(' opt_expression_list r_paren
1637           {
1638                 param_sanity($3);
1639                 $1->opcode = Op_func_call;
1640                 $1->func_body = NULL;
1641                 if ($3 == NULL) {       /* no argument or error */
1642                         ($1 + 1)->expr_count = 0;
1643                         $$ = list_create($1);
1644                 } else {
1645                         INSTRUCTION *t = $3;
1646                         ($1 + 1)->expr_count = count_expressions(&t, true); 
1647                         $$ = list_append(t, $1);
1648                 }
1649           }
1650         ;
1651
1652 opt_variable
1653         : /* empty */
1654           { $$ = NULL; }
1655         | variable
1656           { $$ = $1; }
1657         ;
1658
1659 delete_subscript_list
1660         : /* empty */
1661           { $$ = NULL; }
1662         | delete_subscript SUBSCRIPT
1663           { $$ = $1; }
1664         ;
1665
1666 delete_subscript
1667         : delete_exp_list
1668           {     $$ = $1; }
1669         | delete_subscript delete_exp_list
1670           {
1671                 $$ = list_merge($1, $2);
1672           }
1673         ;
1674
1675 delete_exp_list
1676         : bracketed_exp_list
1677           {
1678                 INSTRUCTION *ip = $1->lasti; 
1679                 int count = ip->sub_count;      /* # of SUBSEP-seperated expressions */
1680                 if (count > 1) {
1681                         /* change Op_subscript or Op_sub_array to Op_concat */
1682                         ip->opcode = Op_concat;
1683                         ip->concat_flag = CSUBSEP;
1684                         ip->expr_count = count;
1685                 } else
1686                         ip->opcode = Op_no_op;
1687                 sub_counter++;  /* count # of dimensions */
1688                 $$ = $1;
1689           }
1690         ;
1691
1692 bracketed_exp_list
1693         : '[' expression_list ']'
1694           {
1695                 INSTRUCTION *t = $2;
1696                 if ($2 == NULL) {
1697                         error_ln($3->source_line,
1698                                 _("invalid subscript expression"));
1699                         /* install Null string as subscript. */
1700                         t = list_create(instruction(Op_push_i));
1701                         t->nexti->memory = dupnode(Nnull_string);
1702                         $3->sub_count = 1;                      
1703                 } else
1704                         $3->sub_count = count_expressions(&t, false);
1705                 $$ = list_append(t, $3);
1706           }
1707         ;
1708
1709 subscript
1710         : bracketed_exp_list
1711           {     $$ = $1; }
1712         | subscript bracketed_exp_list
1713           {
1714                 $$ = list_merge($1, $2);
1715           }
1716         ;
1717
1718 subscript_list
1719         : subscript SUBSCRIPT
1720           { $$ = $1; }
1721         ;
1722
1723 simple_variable
1724         : NAME
1725           {
1726                 char *var_name = $1->lextok;
1727
1728                 $1->opcode = Op_push;
1729                 $1->memory = variable($1->source_line, var_name, Node_var_new);
1730                 $$ = list_create($1);
1731           }
1732         | NAME subscript_list
1733           {
1734                 char *arr = $1->lextok;
1735                 $1->memory = variable($1->source_line, arr, Node_var_new);
1736                 $1->opcode = Op_push_array;
1737                 $$ = list_prepend($2, $1);
1738           }
1739         ;
1740
1741 variable
1742         : simple_variable
1743           {
1744                 INSTRUCTION *ip = $1->nexti;
1745                 if (ip->opcode == Op_push
1746                         && ip->memory->type == Node_var
1747                         && ip->memory->var_update
1748                 ) {
1749                         $$ = list_prepend($1, instruction(Op_var_update));
1750                         $$->nexti->update_var = ip->memory->var_update;
1751                 } else
1752                         $$ = $1;
1753           }
1754         | '$' non_post_simp_exp opt_incdec
1755           {
1756                 $$ = list_append($2, $1);
1757                 if ($3 != NULL)
1758                         mk_assignment($2, NULL, $3);
1759           }
1760         ;
1761
1762 opt_incdec
1763         : INCREMENT
1764           {
1765                 $1->opcode = Op_postincrement;
1766           }
1767         | DECREMENT
1768           {
1769                 $1->opcode = Op_postdecrement;
1770           }
1771         | /* empty */   { $$ = NULL; }
1772         ;
1773
1774 l_brace
1775         : '{' opt_nls
1776         ;
1777
1778 r_brace
1779         : '}' opt_nls   { yyerrok; }
1780         ;
1781
1782 r_paren
1783         : ')' { yyerrok; }
1784         ;
1785
1786 opt_semi
1787         : /* empty */
1788         | semi
1789         ;
1790
1791 semi
1792         : ';'   { yyerrok; }
1793         ;
1794
1795 colon
1796         : ':'   { $$ = $1; yyerrok; }
1797         ;
1798
1799 comma
1800         : ',' opt_nls   { yyerrok; }
1801         ;
1802 %%
1803
1804 struct token {
1805         const char *operator;   /* text to match */
1806         OPCODE value;                   /*  type */
1807         int class;                              /* lexical class */
1808         unsigned flags;                 /* # of args. allowed and compatability */
1809 #       define  ARGS    0xFF    /* 0, 1, 2, 3 args allowed (any combination */
1810 #       define  A(n)    (1<<(n))
1811 #       define  VERSION_MASK    0xFF00  /* old awk is zero */
1812 #       define  NOT_OLD         0x0100  /* feature not in old awk */
1813 #       define  NOT_POSIX       0x0200  /* feature not in POSIX */
1814 #       define  GAWKX           0x0400  /* gawk extension */
1815 #       define  BREAK           0x0800  /* break allowed inside */
1816 #       define  CONTINUE        0x1000  /* continue allowed inside */
1817         
1818         NODE *(*ptr)(int);      /* function that implements this keyword */
1819         NODE *(*ptr2)(int);     /* alternate arbitrary-precision function */
1820 };
1821
1822 #if 'a' == 0x81 /* it's EBCDIC */
1823 /* tokcompare --- lexicographically compare token names for sorting */
1824
1825 static int
1826 tokcompare(const void *l, const void *r)
1827 {
1828         struct token *lhs, *rhs;
1829
1830         lhs = (struct token *) l;
1831         rhs = (struct token *) r;
1832
1833         return strcmp(lhs->operator, rhs->operator);
1834 }
1835 #endif
1836
1837 /*
1838  * Tokentab is sorted ASCII ascending order, so it can be binary searched.
1839  * See check_special(), which sorts the table on EBCDIC systems.
1840  * Function pointers come from declarations in awk.h.
1841  */
1842
1843 #ifdef HAVE_MPFR
1844 #define MPF(F) do_mpfr_##F
1845 #else
1846 #define MPF(F) 0
1847 #endif
1848
1849 static const struct token tokentab[] = {
1850 {"BEGIN",       Op_rule,         LEX_BEGIN,     0,              0,      0},
1851 {"BEGINFILE",   Op_rule,         LEX_BEGINFILE, GAWKX,          0,      0},
1852 {"END",         Op_rule,         LEX_END,       0,              0,      0},
1853 {"ENDFILE",             Op_rule,         LEX_ENDFILE,   GAWKX,          0,      0},
1854 #ifdef ARRAYDEBUG
1855 {"adump",       Op_builtin,    LEX_BUILTIN,     GAWKX|A(1)|A(2),        do_adump,       0},
1856 #endif
1857 {"and",         Op_builtin,    LEX_BUILTIN,     GAWKX,          do_and, MPF(and)},
1858 {"asort",       Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_asort,       0},
1859 {"asorti",      Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_asorti,      0},
1860 {"atan2",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(2),   do_atan2,       MPF(atan2)},
1861 {"bindtextdomain",      Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2),        do_bindtextdomain,      0},
1862 {"break",       Op_K_break,      LEX_BREAK,     0,              0,      0},
1863 {"case",        Op_K_case,       LEX_CASE,      GAWKX,          0,      0},
1864 {"close",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1)|A(2),      do_close,       0},
1865 {"compl",       Op_builtin,    LEX_BUILTIN,     GAWKX|A(1),     do_compl,       MPF(compl)},
1866 {"continue",    Op_K_continue, LEX_CONTINUE,    0,              0,      0},
1867 {"cos",         Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_cos, MPF(cos)},
1868 {"dcgettext",   Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_dcgettext,   0},
1869 {"dcngettext",  Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext,  0},
1870 {"default",     Op_K_default,    LEX_DEFAULT,   GAWKX,          0,      0},
1871 {"delete",      Op_K_delete,     LEX_DELETE,    NOT_OLD,        0,      0},
1872 {"do",          Op_K_do,         LEX_DO,        NOT_OLD|BREAK|CONTINUE, 0,      0},
1873 {"else",        Op_K_else,       LEX_ELSE,      0,              0,      0},
1874 {"eval",        Op_symbol,       LEX_EVAL,      0,              0,      0},
1875 {"exit",        Op_K_exit,       LEX_EXIT,      0,              0,      0},
1876 {"exp",         Op_builtin,      LEX_BUILTIN,   A(1),           do_exp, MPF(exp)},
1877 #ifdef DYNAMIC
1878 {"extension",   Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_ext, 0},
1879 #endif
1880 {"fflush",      Op_builtin,      LEX_BUILTIN,   A(0)|A(1), do_fflush,   0},
1881 {"for",         Op_K_for,        LEX_FOR,       BREAK|CONTINUE, 0,      0},
1882 {"func",        Op_func, LEX_FUNCTION,  NOT_POSIX|NOT_OLD,      0,      0},
1883 {"function",Op_func, LEX_FUNCTION,      NOT_OLD,        0,      0},
1884 {"gensub",      Op_sub_builtin,  LEX_BUILTIN,   GAWKX|A(3)|A(4), 0,     0},
1885 {"getline",     Op_K_getline_redir,      LEX_GETLINE,   NOT_OLD,        0,      0},
1886 {"gsub",        Op_sub_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), 0,   0},
1887 {"if",          Op_K_if,         LEX_IF,        0,              0,      0},
1888 {"in",          Op_symbol,       LEX_IN,        0,              0,      0},
1889 {"include",     Op_symbol,       LEX_INCLUDE,   GAWKX,  0,      0},
1890 {"index",       Op_builtin,      LEX_BUILTIN,   A(2),           do_index,       0},
1891 {"int",         Op_builtin,      LEX_BUILTIN,   A(1),           do_int, MPF(int)},
1892 {"isarray",     Op_builtin,      LEX_BUILTIN,   GAWKX|A(1),     do_isarray,     0},
1893 {"length",      Op_builtin,      LEX_LENGTH,    A(0)|A(1),      do_length,      0},
1894 {"load",        Op_symbol,       LEX_LOAD,      GAWKX,          0,      0},
1895 {"log",         Op_builtin,      LEX_BUILTIN,   A(1),           do_log, MPF(log)},
1896 {"lshift",      Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_lshift,      MPF(lshift)},
1897 {"match",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(2)|A(3), do_match,    0},
1898 {"mktime",      Op_builtin,      LEX_BUILTIN,   GAWKX|A(1),     do_mktime,      0},
1899 {"next",        Op_K_next,       LEX_NEXT,      0,              0,      0},
1900 {"nextfile",    Op_K_nextfile, LEX_NEXTFILE,    0,              0,      0},
1901 {"or",          Op_builtin,    LEX_BUILTIN,     GAWKX,          do_or,  MPF(or)},
1902 {"patsplit",    Op_builtin,    LEX_BUILTIN,     GAWKX|A(2)|A(3)|A(4), do_patsplit,      0},
1903 {"print",       Op_K_print,      LEX_PRINT,     0,              0,      0},
1904 {"printf",      Op_K_printf,     LEX_PRINTF,    0,              0,      0},
1905 {"rand",        Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(0),   do_rand,        MPF(rand)},
1906 {"return",      Op_K_return,     LEX_RETURN,    NOT_OLD,        0,      0},
1907 {"rshift",      Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_rshift,      MPF(rshift)},
1908 {"sin",         Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_sin, MPF(sin)},
1909 {"split",       Op_builtin,      LEX_BUILTIN,   A(2)|A(3)|A(4), do_split,       0},
1910 {"sprintf",     Op_builtin,      LEX_BUILTIN,   0,              do_sprintf,     0},
1911 {"sqrt",        Op_builtin,      LEX_BUILTIN,   A(1),           do_sqrt,        MPF(sqrt)},
1912 {"srand",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(0)|A(1), do_srand,    MPF(srand)},
1913 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
1914 {"stopme",      Op_builtin,     LEX_BUILTIN,    GAWKX|A(0),     stopme,         0},
1915 #endif
1916 {"strftime",    Op_builtin,      LEX_BUILTIN,   GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
1917 {"strtonum",    Op_builtin,    LEX_BUILTIN,     GAWKX|A(1),     do_strtonum, MPF(strtonum)},
1918 {"sub",         Op_sub_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), 0,   0},
1919 {"substr",      Op_builtin,      LEX_BUILTIN,   A(2)|A(3),      do_substr,      0},
1920 {"switch",      Op_K_switch,     LEX_SWITCH,    GAWKX|BREAK,    0,      0},
1921 {"system",      Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_system,      0},
1922 {"systime",     Op_builtin,      LEX_BUILTIN,   GAWKX|A(0),     do_systime,     0},
1923 {"tolower",     Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_tolower,     0},
1924 {"toupper",     Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_toupper,     0},
1925 {"while",       Op_K_while,      LEX_WHILE,     BREAK|CONTINUE, 0,      0},
1926 {"xor",         Op_builtin,    LEX_BUILTIN,     GAWKX,          do_xor, MPF(xor)},
1927 };
1928
1929 #if MBS_SUPPORT
1930 /* Variable containing the current shift state.  */
1931 static mbstate_t cur_mbstate;
1932 /* Ring buffer containing current characters.  */
1933 #define MAX_CHAR_IN_RING_BUFFER 8
1934 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1935 static char cur_char_ring[RING_BUFFER_SIZE];
1936 /* Index for ring buffers.  */
1937 static int cur_ring_idx;
1938 /* This macro means that last nextc() return a singlebyte character
1939    or 1st byte of a multibyte character.  */
1940 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1941 #else /* MBS_SUPPORT */
1942 /* a dummy */
1943 #define nextc_is_1stbyte 1
1944 #endif /* MBS_SUPPORT */
1945
1946 /* getfname --- return name of a builtin function (for pretty printing) */
1947
1948 const char *
1949 getfname(NODE *(*fptr)(int))
1950 {
1951         int i, j;
1952
1953         j = sizeof(tokentab) / sizeof(tokentab[0]);
1954         /* linear search, no other way to do it */
1955         for (i = 0; i < j; i++) 
1956                 if (tokentab[i].ptr == fptr)
1957                         return tokentab[i].operator;
1958
1959         return NULL;
1960 }
1961
1962 /* negate_num --- negate a number in NODE */
1963
1964 void
1965 negate_num(NODE *n)
1966 {
1967 #ifdef HAVE_MPFR
1968         int tval = 0;
1969 #endif
1970
1971         if (! is_mpg_number(n)) {
1972                 n->numbr = -n->numbr;
1973                 return;
1974         }
1975
1976 #ifdef HAVE_MPFR
1977         if (is_mpg_integer(n)) {
1978                 if (! iszero(n)) {
1979                         mpz_neg(n->mpg_i, n->mpg_i);
1980                         return;
1981                 }
1982
1983                 /*
1984                  * 0 --> -0 conversion. Requires turning the MPG integer
1985                  * into an MPFR float.
1986                  */
1987
1988                 mpz_clear(n->mpg_i);    /* release the integer storage */
1989
1990                 /* Convert and fall through. */
1991                 tval = mpfr_set_d(n->mpg_numbr, 0.0, ROUND_MODE);
1992                 IEEE_FMT(n->mpg_numbr, tval);
1993                 n->flags &= ~MPZN;
1994                 n->flags |= MPFN;
1995         }
1996
1997         /* mpfr float case */
1998         tval = mpfr_neg(n->mpg_numbr, n->mpg_numbr, ROUND_MODE);
1999         IEEE_FMT(n->mpg_numbr, tval);
2000 #endif
2001 }
2002
2003 /* print_included_from --- print `Included from ..' file names and locations */
2004
2005 static void
2006 print_included_from()
2007 {
2008         int saveline, line;
2009         SRCFILE *s;
2010
2011         /* suppress current file name, line # from `.. included from ..' msgs */ 
2012         saveline = sourceline;
2013         sourceline = 0;
2014
2015         for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
2016                 s = s->next;
2017                 if (s == NULL || s->fd <= INVALID_HANDLE)
2018                         continue;
2019                 line = s->srclines;
2020
2021                 /* if last token is NEWLINE, line number is off by 1. */
2022                 if (s->lasttok == NEWLINE)
2023                         line--;
2024                 msg("%s %s:%d%c",
2025                         s->prev == sourcefile ? "In file included from"
2026                                           : "                 from",
2027                         (s->stype == SRC_INC ||
2028                                  s->stype == SRC_FILE) ? s->src : "cmd. line",
2029                         line,
2030                         s->stype == SRC_INC ? ',' : ':'
2031                 );
2032         }
2033         sourceline = saveline;
2034 }
2035
2036 /* warning_ln --- print a warning message with location */
2037
2038 static void
2039 warning_ln(int line, const char *mesg, ...)
2040 {
2041         va_list args;
2042         int saveline;
2043
2044         saveline = sourceline;
2045         sourceline = line;
2046         print_included_from();
2047         va_start(args, mesg);
2048         err(false, _("warning: "), mesg, args);
2049         va_end(args);
2050         sourceline = saveline;
2051 }
2052
2053 /* lintwarn_ln --- print a lint warning and location */
2054
2055 static void
2056 lintwarn_ln(int line, const char *mesg, ...)
2057 {
2058         va_list args;
2059         int saveline;
2060
2061         saveline = sourceline;
2062         sourceline = line;
2063         print_included_from();
2064         va_start(args, mesg);
2065         if (lintfunc == r_fatal)
2066                 err(true, _("fatal: "), mesg, args);
2067         else
2068                 err(false, _("warning: "), mesg, args);
2069         va_end(args);
2070         sourceline = saveline;
2071         if (lintfunc == r_fatal)
2072                 gawk_exit(EXIT_FATAL);
2073 }
2074
2075 /* error_ln --- print an error message and location */
2076
2077 static void
2078 error_ln(int line, const char *m, ...)
2079 {
2080         va_list args;
2081         int saveline;
2082
2083         saveline = sourceline;
2084         sourceline = line;
2085         print_included_from();
2086         errcount++;
2087         va_start(args, m);
2088         err(false, "error: ", m, args);
2089         va_end(args);
2090         sourceline = saveline;
2091 }
2092
2093 /* yyerror --- print a syntax error message, show where */
2094
2095 static void
2096 yyerror(const char *m, ...)
2097 {
2098         va_list args;
2099         const char *mesg = NULL;
2100         char *bp, *cp;
2101         char *scan;
2102         char *buf;
2103         int count;
2104         static char end_of_file_line[] = "(END OF FILE)";
2105         char save;
2106
2107         print_included_from();
2108
2109         errcount++;
2110         /* Find the current line in the input file */
2111         if (lexptr && lexeme) {
2112                 if (thisline == NULL) {
2113                         cp = lexeme;
2114                         if (*cp == '\n') {
2115                                 cp--;
2116                                 mesg = _("unexpected newline or end of string");
2117                         }
2118                         for (; cp != lexptr_begin && *cp != '\n'; --cp)
2119                                 continue;
2120                         if (*cp == '\n')
2121                                 cp++;
2122                         thisline = cp;
2123                 }
2124                 /* NL isn't guaranteed */
2125                 bp = lexeme;
2126                 while (bp < lexend && *bp && *bp != '\n')
2127                         bp++;
2128         } else {
2129                 thisline = end_of_file_line;
2130                 bp = thisline + strlen(thisline);
2131         }
2132
2133         /*
2134          * Saving and restoring *bp keeps valgrind happy,
2135          * since the guts of glibc uses strlen, even though
2136          * we're passing an explict precision. Sigh.
2137          *
2138          * 8/2003: We may not need this anymore.
2139          */
2140         save = *bp;
2141         *bp = '\0';
2142
2143         msg("%.*s", (int) (bp - thisline), thisline);
2144
2145         *bp = save;
2146         va_start(args, m);
2147         if (mesg == NULL)
2148                 mesg = m;
2149
2150         count = (bp - thisline) + strlen(mesg) + 2 + 1;
2151         emalloc(buf, char *, count, "yyerror");
2152
2153         bp = buf;
2154
2155         if (lexptr != NULL) {
2156                 scan = thisline;
2157                 while (scan < lexeme)
2158                         if (*scan++ == '\t')
2159                                 *bp++ = '\t';
2160                         else
2161                                 *bp++ = ' ';
2162                 *bp++ = '^';
2163                 *bp++ = ' ';
2164         }
2165         strcpy(bp, mesg);
2166         err(false, "", buf, args);
2167         va_end(args);
2168         efree(buf);
2169 }
2170
2171 /* mk_program --- create a single list of instructions */
2172
2173 static INSTRUCTION *
2174 mk_program()
2175 {
2176         INSTRUCTION *cp, *tmp;
2177
2178 #define begin_block         rule_block[BEGIN]
2179 #define end_block           rule_block[END]
2180 #define prog_block          rule_block[Rule]
2181 #define beginfile_block     rule_block[BEGINFILE]
2182 #define endfile_block       rule_block[ENDFILE]
2183
2184         if (end_block == NULL)
2185                 end_block = list_create(ip_end);
2186         else
2187                 (void) list_prepend(end_block, ip_end);
2188
2189         if (! in_main_context()) {
2190                 if (begin_block != NULL && prog_block != NULL)
2191                         cp = list_merge(begin_block, prog_block);
2192                 else
2193                         cp = (begin_block != NULL) ? begin_block : prog_block;
2194
2195                 if (cp != NULL)
2196                         (void) list_merge(cp, end_block);
2197                 else
2198                         cp = end_block;
2199
2200                 (void) list_append(cp, instruction(Op_stop));
2201                 goto out;
2202         }
2203
2204         if (endfile_block == NULL)
2205                 endfile_block = list_create(ip_endfile);
2206         else {
2207                 ip_rec->has_endfile = true;
2208                 (void) list_prepend(endfile_block, ip_endfile);
2209         }
2210
2211         if (beginfile_block == NULL)
2212                 beginfile_block = list_create(ip_beginfile);
2213         else
2214                 (void) list_prepend(beginfile_block, ip_beginfile);
2215
2216         if (prog_block == NULL) {
2217                 if (end_block->nexti == end_block->lasti
2218                                 && beginfile_block->nexti == beginfile_block->lasti 
2219                                 && endfile_block->nexti == endfile_block->lasti
2220                 ) {
2221                         /* no pattern-action and (real) end, beginfile or endfile blocks */
2222                         bcfree(ip_rec);
2223                         bcfree(ip_newfile);
2224                         ip_rec = ip_newfile = NULL;
2225
2226                         list_append(beginfile_block, instruction(Op_after_beginfile));
2227                         (void) list_append(endfile_block, instruction(Op_after_endfile));
2228
2229                         if (begin_block == NULL)     /* no program at all */
2230                                 cp = end_block;
2231                         else
2232                                 cp = list_merge(begin_block, end_block);
2233                         (void) list_append(cp, ip_atexit);
2234                         (void) list_append(cp, instruction(Op_stop));
2235
2236                         /* append beginfile_block and endfile_block for sole use
2237                          * in getline without redirection (Op_K_getline).
2238                          */
2239
2240                         (void) list_merge(cp, beginfile_block);
2241                         (void) list_merge(cp, endfile_block);
2242
2243                         goto out;
2244
2245                 } else {
2246                         /* install a do-nothing prog block */
2247                         prog_block = list_create(instruction(Op_no_op));
2248                 }
2249         }
2250
2251         (void) list_append(endfile_block, instruction(Op_after_endfile));
2252         (void) list_prepend(prog_block, ip_rec);
2253         (void) list_append(prog_block, instruction(Op_jmp));
2254         prog_block->lasti->target_jmp = ip_rec;
2255                 
2256         list_append(beginfile_block, instruction(Op_after_beginfile));
2257
2258         cp = list_merge(beginfile_block, prog_block);
2259         (void) list_prepend(cp, ip_newfile);
2260         (void) list_merge(cp, endfile_block);
2261         (void) list_merge(cp, end_block);
2262         if (begin_block != NULL)
2263                 cp = list_merge(begin_block, cp);
2264
2265         (void) list_append(cp, ip_atexit);
2266         (void) list_append(cp, instruction(Op_stop));
2267
2268 out:
2269         /* delete the Op_list, not needed */
2270         tmp = cp->nexti;
2271         bcfree(cp);
2272         return tmp;
2273
2274 #undef begin_block
2275 #undef end_block
2276 #undef prog_block
2277 #undef beginfile_block
2278 #undef endfile_block 
2279 }
2280
2281 /* parse_program --- read in the program and convert into a list of instructions */
2282
2283 int
2284 parse_program(INSTRUCTION **pcode)
2285 {
2286         int ret;
2287
2288         /* pre-create non-local jump targets
2289          * ip_end (Op_no_op) -- used as jump target for `exit'
2290          * outside an END block.
2291          */
2292         ip_end = instruction(Op_no_op);
2293
2294         if (! in_main_context())
2295                 ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
2296         else {
2297                 ip_endfile = instruction(Op_no_op);
2298                 ip_beginfile = instruction(Op_no_op);
2299                 ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
2300                 ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
2301                 ip_newfile->target_jmp = ip_end;
2302                 ip_newfile->target_endfile = ip_endfile;
2303                 (ip_newfile + 1)->target_get_record = ip_rec;
2304                 ip_rec->target_newfile = ip_newfile;
2305                 ip_atexit = instruction(Op_atexit);     /* target for `exit' in END block */
2306         }
2307
2308         for (sourcefile = srcfiles->next; sourcefile->stype == SRC_EXTLIB;
2309                         sourcefile = sourcefile->next)
2310                 ;
2311
2312         lexeof = false;
2313         lexptr = NULL;
2314         lasttok = 0;
2315         memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
2316         errcount = 0;
2317         tok = tokstart != NULL ? tokstart : tokexpand();
2318
2319         ret = yyparse();
2320         *pcode = mk_program();
2321
2322         /* avoid false source indications */
2323         source = NULL;
2324         sourceline = 0;
2325         if (ret == 0)   /* avoid spurious warning if parser aborted with YYABORT */
2326                 check_funcs();
2327
2328         if (args_array == NULL)
2329                 emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2330         else
2331                 erealloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2332
2333         return (ret || errcount);
2334 }
2335
2336 /* do_add_srcfile --- add one item to srcfiles */
2337
2338 static SRCFILE *
2339 do_add_srcfile(enum srctype stype, char *src, char *path, SRCFILE *thisfile)
2340 {
2341         SRCFILE *s;
2342
2343         emalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
2344         memset(s, 0, sizeof(SRCFILE));
2345         s->src = estrdup(src, strlen(src));
2346         s->fullpath = path;
2347         s->stype = stype;
2348         s->fd = INVALID_HANDLE;
2349         s->next = thisfile;
2350         s->prev = thisfile->prev;
2351         thisfile->prev->next = s;
2352         thisfile->prev = s;
2353         return s;
2354 }
2355
2356 /* add_srcfile --- add one item to srcfiles after checking if
2357  *                              a source file exists and not already in list.
2358  */
2359
2360 SRCFILE *
2361 add_srcfile(enum srctype stype, char *src, SRCFILE *thisfile, bool *already_included, int *errcode)
2362 {
2363         SRCFILE *s;
2364         struct stat sbuf;
2365         char *path;
2366         int errno_val = 0;
2367
2368         if (already_included)
2369                 *already_included = false;
2370         if (errcode)
2371                 *errcode = 0;
2372         if (stype == SRC_CMDLINE || stype == SRC_STDIN)
2373                 return do_add_srcfile(stype, src, NULL, thisfile);
2374
2375         path = find_source(src, & sbuf, &errno_val, stype == SRC_EXTLIB);
2376         if (path == NULL) {
2377                 if (errcode) {
2378                         *errcode = errno_val;
2379                         return NULL;
2380                 }
2381                 /* use full messages to ease translation */
2382                 fatal(stype != SRC_EXTLIB
2383                         ? _("can't open source file `%s' for reading (%s)")
2384                         : _("can't open shared library `%s' for reading (%s)"),
2385                                 src,
2386                                 errno_val ? strerror(errno_val) : _("reason unknown"));
2387         }
2388
2389         /* N.B. We do not eliminate duplicate SRC_FILE (-f) programs. */
2390         for (s = srcfiles->next; s != srcfiles; s = s->next) {
2391                 if ((s->stype == SRC_FILE || s->stype == SRC_INC || s->stype == SRC_EXTLIB) && files_are_same(path, s)) {
2392                         if (stype == SRC_INC || stype == SRC_EXTLIB) {
2393                                 /* eliminate duplicates */
2394                                 if ((stype == SRC_INC) && (s->stype == SRC_FILE))
2395                                         fatal(_("can't include `%s' and use it as a program file"), src);
2396
2397                                 if (do_lint) {
2398                                         int line = sourceline;
2399                                         /* Kludge: the line number may be off for `@include file'.
2400                                          * Since, this function is also used for '-f file' in main.c,
2401                                          * sourceline > 1 check ensures that the call is at
2402                                          * parse time.
2403                                          */
2404                                         if (sourceline > 1 && lasttok == NEWLINE)
2405                                                 line--;
2406                                         lintwarn_ln(line,
2407                                                     stype != SRC_EXTLIB
2408                                                       ? _("already included source file `%s'")
2409                                                       : _("already loaded shared library `%s'"),
2410                                                     src);
2411                                 }
2412                                 efree(path);
2413                                 if (already_included)
2414                                         *already_included = true;
2415                                 return NULL;
2416                         } else {
2417                                 /* duplicates are allowed for -f */ 
2418                                 if (s->stype == SRC_INC)
2419                                         fatal(_("can't include `%s' and use it as a program file"), src);
2420                                 /* no need to scan for further matches, since
2421                                  * they must be of homogeneous type */
2422                                 break;
2423                         }
2424                 }
2425         }
2426
2427         s = do_add_srcfile(stype, src, path, thisfile);
2428         s->sbuf = sbuf;
2429         s->mtime = sbuf.st_mtime;
2430         return s;
2431 }
2432
2433 /* include_source --- read program from source included using `@include' */
2434
2435 static int
2436 include_source(INSTRUCTION *file)
2437 {
2438         SRCFILE *s;
2439         char *src = file->lextok;
2440         int errcode;
2441         bool already_included;
2442
2443         if (do_traditional || do_posix) {
2444                 error_ln(file->source_line, _("@include is a gawk extension"));
2445                 return -1;
2446         }
2447
2448         if (strlen(src) == 0) {
2449                 if (do_lint)
2450                         lintwarn_ln(file->source_line, _("empty filename after @include"));
2451                 return 0;
2452         }
2453
2454         s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
2455         if (s == NULL) {
2456                 if (already_included)
2457                         return 0;
2458                 error_ln(file->source_line,
2459                         _("can't open source file `%s' for reading (%s)"),
2460                         src, errcode ? strerror(errcode) : _("reason unknown"));
2461                 return -1;
2462         }
2463
2464         /* save scanner state for the current sourcefile */
2465         sourcefile->srclines = sourceline;
2466         sourcefile->lexptr = lexptr;
2467         sourcefile->lexend = lexend;
2468         sourcefile->lexptr_begin = lexptr_begin;        
2469         sourcefile->lexeme = lexeme;
2470         sourcefile->lasttok = lasttok;
2471
2472         /* included file becomes the current source */ 
2473         sourcefile = s;
2474         lexptr = NULL;
2475         sourceline = 0;
2476         source = NULL;
2477         lasttok = 0;
2478         lexeof = false;
2479         eof_warned = false;
2480         return 0;
2481 }
2482
2483 /* load_library --- load a shared library */
2484
2485 static int
2486 load_library(INSTRUCTION *file)
2487 {
2488         SRCFILE *s;
2489         char *src = file->lextok;
2490         int errcode;
2491         bool already_included;
2492
2493         if (do_traditional || do_posix) {
2494                 error_ln(file->source_line, _("@load is a gawk extension"));
2495                 return -1;
2496         }
2497
2498         if (strlen(src) == 0) {
2499                 if (do_lint)
2500                         lintwarn_ln(file->source_line, _("empty filename after @load"));
2501                 return 0;
2502         }
2503
2504         s = add_srcfile(SRC_EXTLIB, src, sourcefile, &already_included, &errcode);
2505         if (s == NULL) {
2506                 if (already_included)
2507                         return 0;
2508                 error_ln(file->source_line,
2509                         _("can't open shared library `%s' for reading (%s)"),
2510                         src, errcode ? strerror(errcode) : _("reason unknown"));
2511                 return -1;
2512         }
2513
2514         load_ext(s->fullpath);
2515         return 0;
2516 }
2517
2518 /* next_sourcefile --- read program from the next source in srcfiles */
2519
2520 static void
2521 next_sourcefile()
2522 {
2523         static int (*closefunc)(int fd) = NULL;
2524
2525         if (closefunc == NULL) {
2526                 char *cp = getenv("AWKREADFUNC");
2527
2528                 /* If necessary, one day, test value for different functions.  */
2529                 if (cp == NULL)
2530                         closefunc = close;
2531                 else
2532                         closefunc = one_line_close;
2533         }
2534
2535         /*
2536          * This won't be true if there's an invalid character in
2537          * the source file or source string (e.g., user typo).
2538          * Previous versions of gawk did not core dump in such a
2539          * case.
2540          *
2541          * assert(lexeof == true);
2542          */
2543
2544         lexeof = false;
2545         eof_warned = false;
2546         sourcefile->srclines = sourceline;      /* total no of lines in current file */
2547         if (sourcefile->fd > INVALID_HANDLE) {
2548                 if (sourcefile->fd != fileno(stdin))  /* safety */
2549                         (*closefunc)(sourcefile->fd);
2550                 sourcefile->fd = INVALID_HANDLE;
2551         }
2552         if (sourcefile->buf != NULL) {
2553                 efree(sourcefile->buf);
2554                 sourcefile->buf = NULL;
2555                 sourcefile->lexptr_begin = NULL;
2556         }
2557
2558         while ((sourcefile = sourcefile->next) != NULL) {
2559                 if (sourcefile == srcfiles)
2560                         return;
2561                 if (sourcefile->stype != SRC_EXTLIB)
2562                         break;
2563         }
2564
2565         if (sourcefile->lexptr_begin != NULL) {
2566                 /* resume reading from already opened file (postponed to process '@include') */
2567                 lexptr = sourcefile->lexptr;
2568                 lexend = sourcefile->lexend;
2569                 lasttok = sourcefile->lasttok;
2570                 lexptr_begin = sourcefile->lexptr_begin;
2571                 lexeme = sourcefile->lexeme;
2572                 sourceline = sourcefile->srclines;
2573                 source = sourcefile->src;
2574         } else {
2575                 lexptr = NULL;
2576                 sourceline = 0;
2577                 source = NULL;
2578                 lasttok = 0;
2579         }
2580 }
2581
2582 /* get_src_buf --- read the next buffer of source program */
2583
2584 static char *
2585 get_src_buf()
2586 {
2587         int n;
2588         char *scan;
2589         bool newfile;
2590         int savelen;
2591         struct stat sbuf;
2592
2593         /*
2594          * No argument prototype on readfunc on purpose,
2595          * avoids problems with some ancient systems where
2596          * the types of arguments to read() aren't up to date.
2597          */
2598         static ssize_t (*readfunc)() = 0;
2599
2600         if (readfunc == NULL) {
2601                 char *cp = getenv("AWKREADFUNC");
2602
2603                 /* If necessary, one day, test value for different functions.  */
2604                 if (cp == NULL)
2605                         /*
2606                          * cast is to remove warnings on systems with
2607                          * different return types for read.
2608                          */
2609                         readfunc = ( ssize_t(*)() ) read;
2610                 else
2611                         readfunc = read_one_line;
2612         }
2613
2614         newfile = false;
2615         if (sourcefile == srcfiles)
2616                 return NULL;
2617
2618         if (sourcefile->stype == SRC_CMDLINE) {
2619                 if (sourcefile->bufsize == 0) {
2620                         sourcefile->bufsize = strlen(sourcefile->src);
2621                         lexptr = lexptr_begin = lexeme = sourcefile->src;
2622                         lexend = lexptr + sourcefile->bufsize;
2623                         sourceline = 1;
2624                         if (sourcefile->bufsize == 0) {
2625                                 /*
2626                                  * Yet Another Special case:
2627                                  *      gawk '' /path/name
2628                                  * Sigh.
2629                                  */
2630                                 static bool warned = false;
2631
2632                                 if (do_lint && ! warned) {
2633                                         warned = true;
2634                                         lintwarn(_("empty program text on command line"));
2635                                 }
2636                                 lexeof = true;
2637                         }
2638                 } else if (sourcefile->buf == NULL  && *(lexptr-1) != '\n') {
2639                         /*
2640                          * The following goop is to ensure that the source
2641                          * ends with a newline and that the entire current
2642                          * line is available for error messages.
2643                          */
2644                         int offset;
2645                         char *buf;
2646
2647                         offset = lexptr - lexeme;
2648                         for (scan = lexeme; scan > lexptr_begin; scan--)
2649                                 if (*scan == '\n') {
2650                                         scan++;
2651                                         break;
2652                                 }
2653                         savelen = lexptr - scan;
2654                         emalloc(buf, char *, savelen + 1, "get_src_buf");
2655                         memcpy(buf, scan, savelen);
2656                         thisline = buf;
2657                         lexptr = buf + savelen;
2658                         *lexptr = '\n';
2659                         lexeme = lexptr - offset;
2660                         lexptr_begin = buf;
2661                         lexend = lexptr + 1;
2662                         sourcefile->buf = buf;
2663                 } else
2664                         lexeof = true;
2665                 return lexptr;
2666         }
2667
2668         if (sourcefile->fd <= INVALID_HANDLE) {
2669                 int fd;
2670                 int l;
2671
2672                 source = sourcefile->src;
2673                 if (source == NULL)
2674                         return NULL;
2675                 fd = srcopen(sourcefile);
2676                 if (fd <= INVALID_HANDLE) {
2677                         char *in;
2678
2679                         /* suppress file name and line no. in error mesg */
2680                         in = source;
2681                         source = NULL;
2682                         error(_("can't open source file `%s' for reading (%s)"),
2683                                 in, strerror(errno));
2684                         errcount++;
2685                         lexeof = true;
2686                         return sourcefile->src;
2687                 }
2688
2689                 sourcefile->fd = fd;
2690                 l = optimal_bufsize(fd, &sbuf);
2691                 /*
2692                  * Make sure that something silly like
2693                  *      AWKBUFSIZE=8 make check
2694                  * works ok.
2695                  */
2696 #define A_DECENT_BUFFER_SIZE    128
2697                 if (l < A_DECENT_BUFFER_SIZE)
2698                         l = A_DECENT_BUFFER_SIZE;
2699 #undef A_DECENT_BUFFER_SIZE
2700                 sourcefile->bufsize = l;
2701                 newfile = true;
2702                 emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2703                 lexptr = lexptr_begin = lexeme = sourcefile->buf;
2704                 savelen = 0;
2705                 sourceline = 1;
2706                 thisline = NULL;
2707         } else {
2708                 /*
2709                  * Here, we retain the current source line in the beginning of the buffer.
2710                  */
2711                 int offset;
2712                 for (scan = lexeme; scan > lexptr_begin; scan--)
2713                         if (*scan == '\n') {
2714                                 scan++;
2715                                 break;
2716                         }
2717
2718                 savelen = lexptr - scan;
2719                 offset = lexptr - lexeme;
2720
2721                 if (savelen > 0) {
2722                         /*
2723                          * Need to make sure we have room left for reading new text;
2724                          * grow the buffer (by doubling, an arbitrary choice), if the retained line
2725                          * takes up more than a certain percentage (50%, again an arbitrary figure)
2726                          * of the available space.
2727                          */
2728
2729                         if (savelen > sourcefile->bufsize / 2) { /* long line or token  */
2730                                 sourcefile->bufsize *= 2;
2731                                 erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2732                                 scan = sourcefile->buf + (scan - lexptr_begin);
2733                                 lexptr_begin = sourcefile->buf;
2734                         }
2735
2736                         thisline = lexptr_begin;
2737                         memmove(thisline, scan, savelen);
2738                         lexptr = thisline + savelen;
2739                         lexeme = lexptr - offset;
2740                 } else {
2741                         savelen = 0;
2742                         lexptr = lexeme = lexptr_begin;
2743                         thisline = NULL;
2744                 }
2745         }
2746
2747         n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
2748         if (n == -1) {
2749                 error(_("can't read sourcefile `%s' (%s)"),
2750                                 source, strerror(errno));
2751                 errcount++;
2752                 lexeof = true;
2753         } else {
2754                 lexend = lexptr + n;
2755                 if (n == 0) {
2756                         static bool warned = false;
2757                         if (do_lint && newfile && ! warned){
2758                                 warned = true;
2759                                 sourceline = 0;
2760                                 lintwarn(_("source file `%s' is empty"), source);
2761                         }
2762                         lexeof = true;
2763                 }
2764         }
2765         return sourcefile->buf;
2766 }
2767
2768 /* tokadd --- add a character to the token buffer */
2769
2770 #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
2771
2772 /* tokexpand --- grow the token buffer */
2773
2774 static char *
2775 tokexpand()
2776 {
2777         static int toksize;
2778         int tokoffset;
2779                         
2780         if (tokstart != NULL) {
2781                 tokoffset = tok - tokstart;
2782                 toksize *= 2;
2783                 erealloc(tokstart, char *, toksize, "tokexpand");
2784                 tok = tokstart + tokoffset;
2785         } else {
2786                 toksize = 60;
2787                 emalloc(tokstart, char *, toksize, "tokexpand");
2788                 tok = tokstart;
2789         }
2790         tokend = tokstart + toksize;
2791         return tok;
2792 }
2793
2794 /* nextc --- get the next input character */
2795
2796 #if MBS_SUPPORT
2797
2798 static int
2799 nextc(void)
2800 {
2801         if (gawk_mb_cur_max > 1) {
2802 again:
2803                 if (lexeof)
2804                         return END_FILE;
2805                 if (lexptr == NULL || lexptr >= lexend) {
2806                         if (get_src_buf())
2807                                 goto again;
2808                         return END_SRC;
2809                 }
2810
2811                 /* Update the buffer index.  */
2812                 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
2813                         cur_ring_idx + 1;
2814
2815                 /* Did we already check the current character?  */
2816                 if (cur_char_ring[cur_ring_idx] == 0) {
2817                         /* No, we need to check the next character on the buffer.  */
2818                         int idx, work_ring_idx = cur_ring_idx;
2819                         mbstate_t tmp_state;
2820                         size_t mbclen;
2821         
2822                         for (idx = 0 ; lexptr + idx < lexend ; idx++) {
2823                                 tmp_state = cur_mbstate;
2824                                 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
2825
2826                                 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
2827                                         /* It is a singlebyte character, non-complete multibyte
2828                                            character or EOF.  We treat it as a singlebyte
2829                                            character.  */
2830                                         cur_char_ring[work_ring_idx] = 1;
2831                                         break;
2832                                 } else if (mbclen == (size_t)-2) {
2833                                         /* It is not a complete multibyte character.  */
2834                                         cur_char_ring[work_ring_idx] = idx + 1;
2835                                 } else {
2836                                         /* mbclen > 1 */
2837                                         cur_char_ring[work_ring_idx] = mbclen;
2838                                         break;
2839                                 }
2840                                 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2841                                         0 : work_ring_idx + 1;
2842                         }
2843                         cur_mbstate = tmp_state;
2844
2845                         /* Put a mark on the position on which we write next character.  */
2846                         work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2847                                 0 : work_ring_idx + 1;
2848                         cur_char_ring[work_ring_idx] = 0;
2849                 }
2850
2851                 return (int) (unsigned char) *lexptr++;
2852         } else {
2853                 do {
2854                         if (lexeof)
2855                                 return END_FILE;
2856                         if (lexptr && lexptr < lexend)
2857                                         return ((int) (unsigned char) *lexptr++);
2858                 } while (get_src_buf());
2859                 return END_SRC;
2860         }
2861 }
2862
2863 #else /* MBS_SUPPORT */
2864
2865 int
2866 nextc()
2867 {
2868         do {
2869                 if (lexeof)
2870                         return END_FILE;
2871                 if (lexptr && lexptr < lexend)
2872                         return ((int) (unsigned char) *lexptr++);
2873         } while (get_src_buf());
2874         return END_SRC;
2875 }
2876
2877 #endif /* MBS_SUPPORT */
2878
2879 /* pushback --- push a character back on the input */
2880
2881 static inline void
2882 pushback(void)
2883 {
2884 #if MBS_SUPPORT
2885         if (gawk_mb_cur_max > 1)
2886                 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
2887                         cur_ring_idx - 1;
2888 #endif
2889         (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
2890 }
2891
2892
2893 /* allow_newline --- allow newline after &&, ||, ? and : */
2894
2895 static void
2896 allow_newline(void)
2897 {
2898         int c;
2899
2900         for (;;) {
2901                 c = nextc();
2902                 if (c == END_FILE) {
2903                         pushback();
2904                         break;
2905                 }
2906                 if (c == '#') {
2907                         while ((c = nextc()) != '\n' && c != END_FILE)
2908                                 continue;
2909                         if (c == END_FILE) {
2910                                 pushback();
2911                                 break;
2912                         }
2913                 }
2914                 if (c == '\n')
2915                         sourceline++;
2916                 if (! isspace(c)) {
2917                         pushback();
2918                         break;
2919                 }
2920         }
2921 }
2922
2923 /* newline_eof --- return newline or EOF as needed and adjust variables */
2924
2925 /*
2926  * This routine used to be a macro, however GCC 4.6.2 warned about
2927  * the result of a computation not being used.  Converting to a function
2928  * removes the warnings.
2929  */
2930
2931 static int newline_eof()
2932 {
2933         /* NB: a newline at end does not start a source line. */
2934         if (lasttok != NEWLINE) {
2935                 pushback();
2936                 if (do_lint && ! eof_warned) {
2937                         lintwarn(_("source file does not end in newline"));
2938                         eof_warned = true;
2939                 }
2940                 sourceline++;
2941                 return NEWLINE;
2942         }
2943
2944         sourceline--;
2945         eof_warned = false;
2946         return LEX_EOF;
2947 }
2948
2949 /* yylex --- Read the input and turn it into tokens. */
2950
2951 static int
2952 yylex(void)
2953 {
2954         int c;
2955         bool seen_e = false;            /* These are for numbers */
2956         bool seen_point = false;
2957         bool esc_seen;          /* for literal strings */
2958         int mid;
2959         int base;
2960         static bool did_newline = false;
2961         char *tokkey;
2962         bool inhex = false;
2963         bool intlstr = false;
2964         AWKNUM d;
2965
2966 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
2967
2968 #define NEWLINE_EOF newline_eof()
2969
2970         yylval = (INSTRUCTION *) NULL;
2971         if (lasttok == SUBSCRIPT) {
2972                 lasttok = 0;
2973                 return SUBSCRIPT;
2974         }
2975  
2976         if (lasttok == LEX_EOF)         /* error earlier in current source, must give up !! */
2977                 return 0;
2978
2979         c = nextc();
2980         if (c == END_SRC)
2981                 return 0;
2982         if (c == END_FILE)
2983                 return lasttok = NEWLINE_EOF;
2984         pushback();
2985
2986 #if defined __EMX__
2987         /*
2988          * added for OS/2's extproc feature of cmd.exe
2989          * (like #! in BSD sh)
2990          */
2991         if (strncasecmp(lexptr, "extproc ", 8) == 0) {
2992                 while (*lexptr && *lexptr != '\n')
2993                         lexptr++;
2994         }
2995 #endif
2996
2997         lexeme = lexptr;
2998         thisline = NULL;
2999         if (want_regexp) {
3000                 int in_brack = 0;       /* count brackets, [[:alnum:]] allowed */
3001                 /*
3002                  * Counting brackets is non-trivial. [[] is ok,
3003                  * and so is [\]], with a point being that /[/]/ as a regexp
3004                  * constant has to work.
3005                  *
3006                  * Do not count [ or ] if either one is preceded by a \.
3007                  * A `[' should be counted if
3008                  *  a) it is the first one so far (in_brack == 0)
3009                  *  b) it is the `[' in `[:'
3010                  * A ']' should be counted if not preceded by a \, since
3011                  * it is either closing `:]' or just a plain list.
3012                  * According to POSIX, []] is how you put a ] into a set.
3013                  * Try to handle that too.
3014                  *
3015                  * The code for \ handles \[ and \].
3016                  */
3017
3018                 want_regexp = false;
3019                 tok = tokstart;
3020                 for (;;) {
3021                         c = nextc();
3022
3023                         if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
3024                         case '[':
3025                                 /* one day check for `.' and `=' too */
3026                                 if (nextc() == ':' || in_brack == 0)
3027                                         in_brack++;
3028                                 pushback();
3029                                 break;
3030                         case ']':
3031                                 if (tokstart[0] == '['
3032                                     && (tok == tokstart + 1
3033                                         || (tok == tokstart + 2
3034                                             && tokstart[1] == '^')))
3035                                         /* do nothing */;
3036                                 else
3037                                         in_brack--;
3038                                 break;
3039                         case '\\':
3040                                 if ((c = nextc()) == END_FILE) {
3041                                         pushback();
3042                                         yyerror(_("unterminated regexp ends with `\\' at end of file"));
3043                                         goto end_regexp; /* kludge */
3044                                 } else if (c == '\n') {
3045                                         sourceline++;
3046                                         continue;
3047                                 } else {
3048                                         tokadd('\\');
3049                                         tokadd(c);
3050                                         continue;
3051                                 }
3052                                 break;
3053                         case '/':       /* end of the regexp */
3054                                 if (in_brack > 0)
3055                                         break;
3056 end_regexp:
3057                                 yylval = GET_INSTRUCTION(Op_token);
3058                                 yylval->lextok = estrdup(tokstart, tok - tokstart);
3059                                 if (do_lint) {
3060                                         int peek = nextc();
3061
3062                                         pushback();
3063                                         if (peek == 'i' || peek == 's') {
3064                                                 if (source)
3065                                                         lintwarn(
3066                                                 _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
3067                                                                 source, sourceline, peek);
3068                                                 else
3069                                                         lintwarn(
3070                                                 _("tawk regex modifier `/.../%c' doesn't work in gawk"),
3071                                                                 peek);
3072                                         }
3073                                 }
3074                                 return lasttok = REGEXP;
3075                         case '\n':
3076                                 pushback();
3077                                 yyerror(_("unterminated regexp"));
3078                                 goto end_regexp;        /* kludge */
3079                         case END_FILE:
3080                                 pushback();
3081                                 yyerror(_("unterminated regexp at end of file"));
3082                                 goto end_regexp;        /* kludge */
3083                         }
3084                         tokadd(c);
3085                 }
3086         }
3087 retry:
3088
3089         /* skipping \r is a hack, but windows is just too pervasive. sigh. */
3090         while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
3091                 continue;
3092
3093         lexeme = lexptr ? lexptr - 1 : lexptr;
3094         thisline = NULL;
3095         tok = tokstart;
3096
3097 #if MBS_SUPPORT
3098         if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
3099 #endif
3100         switch (c) {
3101         case END_SRC:
3102                 return 0;
3103
3104         case END_FILE:
3105                 return lasttok = NEWLINE_EOF;
3106
3107         case '\n':
3108                 sourceline++;
3109                 return lasttok = NEWLINE;
3110
3111         case '#':               /* it's a comment */
3112                 while ((c = nextc()) != '\n') {
3113                         if (c == END_FILE)
3114                                 return lasttok = NEWLINE_EOF;
3115                 }
3116                 sourceline++;
3117                 return lasttok = NEWLINE;
3118
3119         case '@':
3120                 return lasttok = '@';
3121
3122         case '\\':
3123 #ifdef RELAXED_CONTINUATION
3124                 /*
3125                  * This code puports to allow comments and/or whitespace
3126                  * after the `\' at the end of a line used for continuation.
3127                  * Use it at your own risk. We think it's a bad idea, which
3128                  * is why it's not on by default.
3129                  */
3130                 if (! do_traditional) {
3131                         /* strip trailing white-space and/or comment */
3132                         while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
3133                                 continue;
3134                         if (c == '#') {
3135                                 static bool warned = false;
3136
3137                                 if (do_lint && ! warned) {
3138                                         warned = true;
3139                                         lintwarn(
3140                 _("use of `\\ #...' line continuation is not portable"));
3141                                 }
3142                                 while ((c = nextc()) != '\n')
3143                                         if (c == END_FILE)
3144                                                 break;
3145                         }
3146                         pushback();
3147                 }
3148 #endif /* RELAXED_CONTINUATION */
3149                 c = nextc();
3150                 if (c == '\r')  /* allow MS-DOS files. bleah */
3151                         c = nextc();
3152                 if (c == '\n') {
3153                         sourceline++;
3154                         goto retry;
3155                 } else {
3156                         yyerror(_("backslash not last character on line"));
3157                         return lasttok = LEX_EOF;
3158                 }
3159                 break;
3160
3161         case ':':
3162         case '?':
3163                 yylval = GET_INSTRUCTION(Op_cond_exp);
3164                 if (! do_posix)
3165                         allow_newline();
3166                 return lasttok = c;
3167
3168                 /*
3169                  * in_parens is undefined unless we are parsing a print
3170                  * statement (in_print), but why bother with a check?
3171                  */
3172         case ')':
3173                 in_parens--;
3174                 return lasttok = c;
3175
3176         case '(':       
3177                 in_parens++;
3178                 return lasttok = c;
3179         case '$':
3180                 yylval = GET_INSTRUCTION(Op_field_spec);
3181                 return lasttok = c;
3182         case '{':
3183                 if (++in_braces == 1)
3184                         firstline = sourceline;
3185         case ';':
3186         case ',':
3187         case '[':
3188                         return lasttok = c;
3189         case ']':
3190                 c = nextc();
3191                 pushback();
3192                 if (c == '[') {
3193                         yylval = GET_INSTRUCTION(Op_sub_array);
3194                         lasttok = ']';
3195                 } else {
3196                         yylval = GET_INSTRUCTION(Op_subscript);
3197                         lasttok = SUBSCRIPT;    /* end of subscripts */
3198                 }
3199                 return ']';
3200
3201         case '*':
3202                 if ((c = nextc()) == '=') {
3203                         yylval = GET_INSTRUCTION(Op_assign_times);
3204                         return lasttok = ASSIGNOP;
3205                 } else if (do_posix) {
3206                         pushback();
3207                         yylval = GET_INSTRUCTION(Op_times);
3208                         return lasttok = '*';
3209                 } else if (c == '*') {
3210                         /* make ** and **= aliases for ^ and ^= */
3211                         static bool did_warn_op = false, did_warn_assgn = false;
3212
3213                         if (nextc() == '=') {
3214                                 if (! did_warn_assgn) {
3215                                         did_warn_assgn = true;
3216                                         if (do_lint)
3217                                                 lintwarn(_("POSIX does not allow operator `**='"));
3218                                         if (do_lint_old)
3219                                                 warning(_("old awk does not support operator `**='"));
3220                                 }
3221                                 yylval = GET_INSTRUCTION(Op_assign_exp);
3222                                 return ASSIGNOP;
3223                         } else {
3224                                 pushback();
3225                                 if (! did_warn_op) {
3226                                         did_warn_op = true;
3227                                         if (do_lint)
3228                                                 lintwarn(_("POSIX does not allow operator `**'"));
3229                                         if (do_lint_old)
3230                                                 warning(_("old awk does not support operator `**'"));
3231                                 }
3232                                 yylval = GET_INSTRUCTION(Op_exp);
3233                                 return lasttok = '^';
3234                         }
3235                 }
3236                 pushback();
3237                 yylval = GET_INSTRUCTION(Op_times);
3238                 return lasttok = '*';
3239
3240         case '/':
3241                 if (nextc() == '=') {
3242                         pushback();
3243                         return lasttok = SLASH_BEFORE_EQUAL;
3244                 }
3245                 pushback();
3246                 yylval = GET_INSTRUCTION(Op_quotient);
3247                 return lasttok = '/';
3248
3249         case '%':
3250                 if (nextc() == '=') {
3251                         yylval = GET_INSTRUCTION(Op_assign_mod);
3252                         return lasttok = ASSIGNOP;
3253                 }
3254                 pushback();
3255                 yylval = GET_INSTRUCTION(Op_mod);
3256                 return lasttok = '%';
3257
3258         case '^':
3259         {
3260                 static bool did_warn_op = false, did_warn_assgn = false;
3261
3262                 if (nextc() == '=') {
3263                         if (do_lint_old && ! did_warn_assgn) {
3264                                 did_warn_assgn = true;
3265                                 warning(_("operator `^=' is not supported in old awk"));
3266                         }
3267                         yylval = GET_INSTRUCTION(Op_assign_exp);
3268                         return lasttok = ASSIGNOP;
3269                 }
3270                 pushback();
3271                 if (do_lint_old && ! did_warn_op) {
3272                         did_warn_op = true;
3273                         warning(_("operator `^' is not supported in old awk"));
3274                 }
3275                 yylval = GET_INSTRUCTION(Op_exp);       
3276                 return lasttok = '^';
3277         }
3278
3279         case '+':
3280                 if ((c = nextc()) == '=') {
3281                         yylval = GET_INSTRUCTION(Op_assign_plus);
3282                         return lasttok = ASSIGNOP;
3283                 }
3284                 if (c == '+') {
3285                         yylval = GET_INSTRUCTION(Op_symbol);
3286                         return lasttok = INCREMENT;
3287                 }
3288                 pushback();
3289                 yylval = GET_INSTRUCTION(Op_plus);
3290                 return lasttok = '+';
3291
3292         case '!':
3293                 if ((c = nextc()) == '=') {
3294                         yylval = GET_INSTRUCTION(Op_notequal);
3295                         return lasttok = RELOP;
3296                 }
3297                 if (c == '~') {
3298                         yylval = GET_INSTRUCTION(Op_nomatch);
3299                         return lasttok = MATCHOP;
3300                 }
3301                 pushback();
3302                 yylval = GET_INSTRUCTION(Op_symbol);
3303                 return lasttok = '!';
3304
3305         case '<':
3306                 if (nextc() == '=') {
3307                         yylval = GET_INSTRUCTION(Op_leq);
3308                         return lasttok = RELOP;
3309                 }
3310                 yylval = GET_INSTRUCTION(Op_less);
3311                 pushback();
3312                 return lasttok = '<';
3313
3314         case '=':
3315                 if (nextc() == '=') {
3316                         yylval = GET_INSTRUCTION(Op_equal);
3317                         return lasttok = RELOP;
3318                 }
3319                 yylval = GET_INSTRUCTION(Op_assign);
3320                 pushback();
3321                 return lasttok = ASSIGN;
3322
3323         case '>':
3324                 if ((c = nextc()) == '=') {
3325                         yylval = GET_INSTRUCTION(Op_geq);
3326                         return lasttok = RELOP;
3327                 } else if (c == '>') {
3328                         yylval = GET_INSTRUCTION(Op_symbol);
3329                         yylval->redir_type = redirect_append;
3330                         return lasttok = IO_OUT;
3331                 }
3332                 pushback();
3333                 if (in_print && in_parens == 0) {
3334                         yylval = GET_INSTRUCTION(Op_symbol);
3335                         yylval->redir_type = redirect_output;
3336                         return lasttok = IO_OUT;
3337                 }
3338                 yylval = GET_INSTRUCTION(Op_greater);
3339                 return lasttok = '>';
3340
3341         case '~':
3342                 yylval = GET_INSTRUCTION(Op_match);
3343                 return lasttok = MATCHOP;
3344
3345         case '}':
3346                 /*
3347                  * Added did newline stuff.  Easier than
3348                  * hacking the grammar.
3349                  */
3350                 if (did_newline) {
3351                         did_newline = false;
3352                         if (--in_braces == 0)
3353                                 lastline = sourceline;
3354                         return lasttok = c;
3355                 }
3356                 did_newline++;
3357                 --lexptr;       /* pick up } next time */
3358                 return lasttok = NEWLINE;
3359
3360         case '"':
3361         string:
3362                 esc_seen = false;
3363                 while ((c = nextc()) != '"') {
3364                         if (c == '\n') {
3365                                 pushback();
3366                                 yyerror(_("unterminated string"));
3367                                 return lasttok = LEX_EOF;
3368                         }
3369                         if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
3370                             c == '\\') {
3371                                 c = nextc();
3372                                 if (c == '\n') {
3373                                         sourceline++;
3374                                         continue;
3375                                 }
3376                                 esc_seen = true;
3377                                 if (! want_source || c != '"')
3378                                         tokadd('\\');
3379                         }
3380                         if (c == END_FILE) {
3381                                 pushback();
3382                                 yyerror(_("unterminated string"));
3383                                 return lasttok = LEX_EOF;
3384                         }
3385                         tokadd(c);
3386                 }
3387                 yylval = GET_INSTRUCTION(Op_token);
3388                 if (want_source) {
3389                         yylval->lextok = estrdup(tokstart, tok - tokstart);
3390                         return lasttok = FILENAME;
3391                 }
3392                 
3393                 yylval->opcode = Op_push_i;
3394                 yylval->memory = make_str_node(tokstart,
3395                                         tok - tokstart, esc_seen ? SCAN : 0);
3396                 if (intlstr) {
3397                         yylval->memory->flags |= INTLSTR;
3398                         intlstr = false;
3399                         if (do_intl)
3400                                 dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
3401                 }
3402                 return lasttok = YSTRING;
3403
3404         case '-':
3405                 if ((c = nextc()) == '=') {
3406                         yylval = GET_INSTRUCTION(Op_assign_minus);
3407                         return lasttok = ASSIGNOP;
3408                 }
3409                 if (c == '-') {
3410                         yylval = GET_INSTRUCTION(Op_symbol);
3411                         return lasttok = DECREMENT;
3412                 }
3413                 pushback();
3414                 yylval = GET_INSTRUCTION(Op_minus);
3415                 return lasttok = '-';
3416
3417         case '.':
3418                 c = nextc();
3419                 pushback();
3420                 if (! isdigit(c))
3421                         return lasttok = '.';
3422                 else
3423                         c = '.';
3424                 /* FALL THROUGH */
3425         case '0':
3426         case '1':
3427         case '2':
3428         case '3':
3429         case '4':
3430         case '5':
3431         case '6':
3432         case '7':
3433         case '8':
3434         case '9':
3435                 /* It's a number */
3436                 for (;;) {
3437                         bool gotnumber = false;
3438
3439                         tokadd(c);
3440                         switch (c) {
3441                         case 'x':
3442                         case 'X':
3443                                 if (do_traditional)
3444                                         goto done;
3445                                 if (tok == tokstart + 2) {
3446                                         int peek = nextc();
3447
3448                                         if (isxdigit(peek)) {
3449                                                 inhex = true;
3450                                                 pushback();     /* following digit */
3451                                         } else {
3452                                                 pushback();     /* x or X */
3453                                                 goto done;
3454                                         }
3455                                 }
3456                                 break;
3457                         case '.':
3458                                 /* period ends exponent part of floating point number */
3459                                 if (seen_point || seen_e) {
3460                                         gotnumber = true;
3461                                         break;
3462                                 }
3463                                 seen_point = true;
3464                                 break;
3465                         case 'e':
3466                         case 'E':
3467                                 if (inhex)
3468                                         break;
3469                                 if (seen_e) {
3470                                         gotnumber = true;
3471                                         break;
3472                                 }
3473                                 seen_e = true;
3474                                 if ((c = nextc()) == '-' || c == '+') {
3475                                         int c2 = nextc();
3476
3477                                         if (isdigit(c2)) {
3478                                                 tokadd(c);
3479                                                 tokadd(c2);
3480                                         } else {
3481                                                 pushback();     /* non-digit after + or - */
3482                                                 pushback();     /* + or - */
3483                                                 pushback();     /* e or E */
3484                                         }
3485                                 } else if (! isdigit(c)) {
3486                                         pushback();     /* character after e or E */
3487                                         pushback();     /* e or E */
3488                                 } else {
3489                                         pushback();     /* digit */
3490                                 }
3491                                 break;
3492                         case 'a':
3493                         case 'A':
3494                         case 'b':
3495                         case 'B':
3496                         case 'c':
3497                         case 'C':
3498                         case 'D':
3499                         case 'd':
3500                         case 'f':
3501                         case 'F':
3502                                 if (do_traditional || ! inhex)
3503                                         goto done;
3504                                 /* fall through */
3505                         case '0':
3506                         case '1':
3507                         case '2':
3508                         case '3':
3509                         case '4':
3510                         case '5':
3511                         case '6':
3512                         case '7':
3513                         case '8':
3514                         case '9':
3515                                 break;
3516                         default:
3517                         done:
3518                                 gotnumber = true;
3519                         }
3520                         if (gotnumber)
3521                                 break;
3522                         c = nextc();
3523                 }
3524                 pushback();
3525
3526                 tokadd('\0');
3527                 yylval = GET_INSTRUCTION(Op_push_i);
3528
3529                 base = 10;
3530                 if (! do_traditional) {
3531                         base = get_numbase(tokstart, false);
3532                         if (do_lint) {
3533                                 if (base == 8)
3534                                         lintwarn("numeric constant `%.*s' treated as octal",
3535                                                 (int) strlen(tokstart)-1, tokstart);
3536                                 else if (base == 16)
3537                                         lintwarn("numeric constant `%.*s' treated as hexadecimal",
3538                                                 (int) strlen(tokstart)-1, tokstart);
3539                         }
3540                 }
3541
3542 #ifdef HAVE_MPFR
3543                 if (do_mpfr) {
3544                         NODE *r;
3545
3546                         if (! seen_point && ! seen_e) {
3547                                 r = mpg_integer();
3548                                 mpg_strtoui(r->mpg_i, tokstart, strlen(tokstart), NULL, base);
3549                                 errno = 0;
3550                         } else {
3551                                 int tval;
3552                                 r = mpg_float();
3553                                 tval = mpfr_strtofr(r->mpg_numbr, tokstart, NULL, base, ROUND_MODE);
3554                                 errno = 0;
3555                                 IEEE_FMT(r->mpg_numbr, tval);
3556                         }
3557                         yylval->memory = r;
3558                         return lasttok = YNUMBER;
3559                 }
3560 #endif
3561                 if (base != 10)
3562                         d = nondec2awknum(tokstart, strlen(tokstart));
3563                 else
3564                         d = atof(tokstart);
3565                 yylval->memory = make_number(d);
3566                 if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d)
3567                         yylval->memory->flags |= NUMINT;
3568                 return lasttok = YNUMBER;
3569
3570         case '&':
3571                 if ((c = nextc()) == '&') {
3572                         yylval = GET_INSTRUCTION(Op_and);
3573                         allow_newline();
3574                         return lasttok = LEX_AND;
3575                 }
3576                 pushback();
3577                 yylval = GET_INSTRUCTION(Op_symbol);
3578                 return lasttok = '&';
3579
3580         case '|':
3581                 if ((c = nextc()) == '|') {
3582                         yylval = GET_INSTRUCTION(Op_or);
3583                         allow_newline();
3584                         return lasttok = LEX_OR;
3585                 } else if (! do_traditional && c == '&') {
3586                         yylval = GET_INSTRUCTION(Op_symbol);
3587                         yylval->redir_type = redirect_twoway;
3588                         return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
3589                 }
3590                 pushback();
3591                 if (in_print && in_parens == 0) {
3592                         yylval = GET_INSTRUCTION(Op_symbol);
3593                         yylval->redir_type = redirect_pipe;
3594                         return lasttok = IO_OUT;
3595                 } else {
3596                         yylval = GET_INSTRUCTION(Op_symbol);
3597                         yylval->redir_type = redirect_pipein;
3598                         return lasttok = IO_IN;
3599                 }
3600         }
3601
3602         if (c != '_' && ! isalpha(c)) {
3603                 yyerror(_("invalid char '%c' in expression"), c);
3604                 return lasttok = LEX_EOF;
3605         }
3606
3607         /*
3608          * Lots of fog here.  Consider:
3609          *
3610          * print "xyzzy"$_"foo"
3611          *
3612          * Without the check for ` lasttok != '$' ', this is parsed as
3613          *
3614          * print "xxyzz" $(_"foo")
3615          *
3616          * With the check, it is "correctly" parsed as three
3617          * string concatenations.  Sigh.  This seems to be
3618          * "more correct", but this is definitely one of those
3619          * occasions where the interactions are funny.
3620          */
3621         if (! do_traditional && c == '_' && lasttok != '$') {
3622                 if ((c = nextc()) == '"') {
3623                         intlstr = true;
3624                         goto string;
3625                 }
3626                 pushback();
3627                 c = '_';
3628         }
3629
3630         /* it's some type of name-type-thing.  Find its length. */
3631         tok = tokstart;
3632         while (c != END_FILE && is_identchar(c)) {
3633                 tokadd(c);
3634                 c = nextc();
3635         }
3636         tokadd('\0');
3637         pushback();
3638
3639         /* See if it is a special token. */
3640         if ((mid = check_special(tokstart)) >= 0) {
3641                 static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
3642                 int class = tokentab[mid].class;
3643
3644                 if ((class == LEX_INCLUDE || class == LEX_LOAD || class == LEX_EVAL)
3645                                 && lasttok != '@')
3646                         goto out;
3647
3648                 if (do_lint) {
3649                         if ((tokentab[mid].flags & GAWKX) != 0 && (warntab[mid] & GAWKX) == 0) {
3650                                 lintwarn(_("`%s' is a gawk extension"),
3651                                         tokentab[mid].operator);
3652                                 warntab[mid] |= GAWKX;
3653                         }
3654                         if ((tokentab[mid].flags & NOT_POSIX) != 0 && (warntab[mid] & NOT_POSIX) == 0) {
3655                                 lintwarn(_("POSIX does not allow `%s'"),
3656                                         tokentab[mid].operator);
3657                                 warntab[mid] |= NOT_POSIX;
3658                         }
3659                 }
3660                 if (do_lint_old && (tokentab[mid].flags & NOT_OLD) != 0
3661                                  && (warntab[mid] & NOT_OLD) == 0
3662                 ) {
3663                         warning(_("`%s' is not supported in old awk"),
3664                                         tokentab[mid].operator);
3665                         warntab[mid] |= NOT_OLD;
3666                 }
3667
3668                 if ((tokentab[mid].flags & BREAK) != 0)
3669                         break_allowed++;
3670                 if ((tokentab[mid].flags & CONTINUE) != 0)
3671                         continue_allowed++;
3672
3673                 switch (class) {
3674                 case LEX_INCLUDE:
3675                 case LEX_LOAD:
3676                         want_source = true;
3677                         break;
3678                 case LEX_EVAL:
3679                         if (in_main_context())
3680                                 goto out;
3681                         emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
3682                         tokkey[0] = '@';
3683                         memcpy(tokkey + 1, tokstart, tok - tokstart);
3684                         yylval = GET_INSTRUCTION(Op_token);
3685                         yylval->lextok = tokkey;
3686                         break;
3687
3688                 case LEX_FUNCTION:
3689                 case LEX_BEGIN:
3690                 case LEX_END:
3691                 case LEX_BEGINFILE:
3692                 case LEX_ENDFILE:               
3693                         yylval = bcalloc(tokentab[mid].value, 3, sourceline);
3694                         break;
3695
3696                 case LEX_FOR:
3697                 case LEX_WHILE:
3698                 case LEX_DO:
3699                 case LEX_SWITCH:
3700                         if (! do_pretty_print)
3701                                 return lasttok = class;
3702                         /* fall through */
3703                 case LEX_CASE:
3704                         yylval = bcalloc(tokentab[mid].value, 2, sourceline);
3705                         break;
3706
3707                 /*
3708                  * These must be checked here, due to the LALR nature of the parser,
3709                  * the rules for continue and break may not be reduced until after
3710                  * a token that increments the xxx_allowed varibles is seen. Bleah.
3711                  */
3712                 case LEX_CONTINUE:
3713                         if (! continue_allowed) {
3714                                 error_ln(sourceline,
3715                                         _("`continue' is not allowed outside a loop"));
3716                                 errcount++;
3717                         }
3718                         goto make_instruction;
3719
3720                 case LEX_BREAK:
3721                         if (! break_allowed) {
3722                                 error_ln(sourceline,
3723                                         _("`break' is not allowed outside a loop or switch"));
3724                                 errcount++;
3725                         }
3726                         goto make_instruction;
3727
3728                 default:
3729 make_instruction:
3730                         yylval = GET_INSTRUCTION(tokentab[mid].value);
3731                         if (class == LEX_BUILTIN || class == LEX_LENGTH)
3732                                 yylval->builtin_idx = mid;
3733                         break;
3734                 }
3735                 return lasttok = class;
3736         }
3737 out:
3738         tokkey = estrdup(tokstart, tok - tokstart);
3739         if (*lexptr == '(') {
3740                 yylval = bcalloc(Op_token, 2, sourceline);
3741                 yylval->lextok = tokkey;        
3742                 return lasttok = FUNC_CALL;
3743         } else {
3744                 static bool goto_warned = false;
3745
3746                 yylval = GET_INSTRUCTION(Op_token);
3747                 yylval->lextok = tokkey;
3748
3749 #define SMART_ALECK     1
3750                 if (SMART_ALECK && do_lint
3751                     && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
3752                         goto_warned = true;
3753                         lintwarn(_("`goto' considered harmful!\n"));
3754                 }
3755                 return lasttok = NAME;
3756         }
3757
3758 #undef GET_INSTRUCTION
3759 #undef NEWLINE_EOF
3760 }
3761
3762 /* snode --- instructions for builtin functions. Checks for arg. count
3763              and supplies defaults where possible. */
3764
3765 static INSTRUCTION *
3766 snode(INSTRUCTION *subn, INSTRUCTION *r)
3767 {
3768         INSTRUCTION *arg;
3769         INSTRUCTION *ip;
3770         NODE *n;
3771         int nexp = 0;
3772         int args_allowed;
3773         int idx = r->builtin_idx;
3774
3775         if (subn != NULL) {
3776                 INSTRUCTION *tp;
3777                 for (tp = subn->nexti; tp; tp = tp->nexti) {
3778                         tp = tp->lasti;
3779                         nexp++;
3780                 }
3781                 assert(nexp > 0);
3782         }               
3783
3784         /* check against how many args. are allowed for this builtin */
3785         args_allowed = tokentab[idx].flags & ARGS;
3786         if (args_allowed && (args_allowed & A(nexp)) == 0) {
3787                 yyerror(_("%d is invalid as number of arguments for %s"),
3788                                 nexp, tokentab[idx].operator);
3789                 return NULL;
3790         }
3791
3792         /* special processing for sub, gsub and gensub */
3793
3794         if (tokentab[idx].value == Op_sub_builtin) {
3795                 const char *operator = tokentab[idx].operator;
3796
3797                 r->sub_flags = 0;
3798
3799                 arg = subn->nexti;              /* first arg list */
3800                 (void) mk_rexp(arg);
3801
3802                 if (strcmp(operator, "gensub") != 0) {
3803                         /* sub and gsub */
3804
3805                         if (strcmp(operator, "gsub") == 0)
3806                                 r->sub_flags |= GSUB;
3807
3808                         arg = arg->lasti->nexti;        /* 2nd arg list */
3809                         if (nexp == 2) {
3810                                 INSTRUCTION *expr;
3811
3812                                 expr = list_create(instruction(Op_push_i));
3813                                 expr->nexti->memory = make_number(0.0);
3814                                 (void) mk_expression_list(subn,
3815                                                 list_append(expr, instruction(Op_field_spec)));
3816                         }
3817
3818                         arg = arg->lasti->nexti;        /* third arg list */
3819                         ip = arg->lasti;
3820                         if (ip->opcode == Op_push_i) {
3821                                 if (do_lint)
3822                                         lintwarn(_("%s: string literal as last arg of substitute has no effect"),
3823                                                 operator);
3824                                 r->sub_flags |= LITERAL;
3825                         } else {
3826                                 if (make_assignable(ip) == NULL)
3827                                         yyerror(_("%s third parameter is not a changeable object"),
3828                                                 operator);
3829                                 else
3830                                         ip->do_reference = true;
3831                         }
3832
3833                         r->expr_count = count_expressions(&subn, false);
3834                         ip = subn->lasti;
3835
3836                         (void) list_append(subn, r);
3837
3838                         /* add after_assign code */
3839                         if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
3840                                 (void) list_append(subn, instruction(Op_var_assign));
3841                                 subn->lasti->assign_ctxt = Op_sub_builtin;
3842                                 subn->lasti->assign_var = ip->memory->var_assign;
3843                         } else if (ip->opcode == Op_field_spec_lhs) {
3844                                 (void) list_append(subn, instruction(Op_field_assign));
3845                                 subn->lasti->assign_ctxt = Op_sub_builtin;
3846                                 subn->lasti->field_assign = (Func_ptr) 0;
3847                                 ip->target_assign = subn->lasti;
3848                         } else if (ip->opcode == Op_subscript_lhs) {
3849                                 (void) list_append(subn, instruction(Op_subscript_assign));
3850                                 subn->lasti->assign_ctxt = Op_sub_builtin;
3851                         }
3852
3853                         return subn;    
3854
3855                 } else {
3856                         /* gensub */
3857
3858                         r->sub_flags |= GENSUB;
3859                         if (nexp == 3) {
3860                                 ip = instruction(Op_push_i);
3861                                 ip->memory = make_number(0.0);
3862                                 (void) mk_expression_list(subn,
3863                                                 list_append(list_create(ip), instruction(Op_field_spec)));
3864                         }
3865
3866                         r->expr_count = count_expressions(&subn, false);
3867                         return list_append(subn, r);
3868                 }
3869         }
3870
3871 #ifdef HAVE_MPFR
3872         /* N.B.: There isn't any special processing for an alternate function below */
3873         if (do_mpfr && tokentab[idx].ptr2)
3874                 r->builtin =  tokentab[idx].ptr2;
3875         else
3876 #endif
3877                 r->builtin = tokentab[idx].ptr;
3878
3879         /* special case processing for a few builtins */
3880
3881         if (r->builtin == do_length) {
3882                 if (nexp == 0) {                
3883                     /* no args. Use $0 */
3884
3885                         INSTRUCTION *list;
3886                         r->expr_count = 1;                      
3887                         list = list_create(r);
3888                         (void) list_prepend(list, instruction(Op_field_spec));
3889                         (void) list_prepend(list, instruction(Op_push_i));
3890                         list->nexti->memory = make_number(0.0);
3891                         return list; 
3892                 } else {
3893                         arg = subn->nexti;
3894                         if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3895                                 arg->nexti->opcode = Op_push_arg;       /* argument may be array */
3896                 }
3897         } else if (r->builtin == do_isarray) {
3898                 arg = subn->nexti;
3899                 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3900                         arg->nexti->opcode = Op_push_arg;       /* argument may be array */
3901         } else if (r->builtin == do_match) {
3902                 static bool warned = false;
3903
3904                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
3905                 (void) mk_rexp(arg);
3906
3907                 if (nexp == 3) {        /* 3rd argument there */
3908                         if (do_lint && ! warned) {
3909                                 warned = true;
3910                                 lintwarn(_("match: third argument is a gawk extension"));
3911                         }
3912                         if (do_traditional) {
3913                                 yyerror(_("match: third argument is a gawk extension"));
3914                                 return NULL;
3915                         }
3916
3917                         arg = arg->lasti->nexti;        /* third arg list */
3918                         ip = arg->lasti;
3919                         if (/*ip == arg->nexti  && */ ip->opcode == Op_push)
3920                                 ip->opcode = Op_push_array;
3921                 }
3922         } else if (r->builtin == do_split) {
3923                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
3924                 ip = arg->lasti;
3925                 if (ip->opcode == Op_push)
3926                         ip->opcode = Op_push_array;
3927                 if (nexp == 2) {
3928                         INSTRUCTION *expr;
3929                         expr = list_create(instruction(Op_push));
3930                         expr->nexti->memory = FS_node;
3931                         (void) mk_expression_list(subn, expr);
3932                 }
3933                 arg = arg->lasti->nexti;
3934                 n = mk_rexp(arg);
3935                 if (nexp == 2)
3936                         n->re_flags |= FS_DFLT;
3937                 if (nexp == 4) {
3938                         arg = arg->lasti->nexti;
3939                         ip = arg->lasti;
3940                         if (ip->opcode == Op_push)
3941                                 ip->opcode = Op_push_array;
3942                 }
3943         } else if (r->builtin == do_patsplit) {
3944                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
3945                 ip = arg->lasti;
3946                 if (ip->opcode == Op_push)
3947                         ip->opcode = Op_push_array;
3948                 if (nexp == 2) {
3949                         INSTRUCTION *expr;
3950                         expr = list_create(instruction(Op_push));
3951                         expr->nexti->memory = FPAT_node;
3952                         (void) mk_expression_list(subn, expr);
3953                 }
3954                 arg = arg->lasti->nexti;
3955                 n = mk_rexp(arg);
3956                 if (nexp == 4) {
3957                         arg = arg->lasti->nexti;
3958                         ip = arg->lasti;
3959                         if (ip->opcode == Op_push)
3960                                 ip->opcode = Op_push_array;
3961                 }
3962         } else if (r->builtin == do_close) {
3963                 static bool warned = false;
3964                 if (nexp == 2) {
3965                         if (do_lint && ! warned) {
3966                                 warned = true;
3967                                 lintwarn(_("close: second argument is a gawk extension"));
3968                         }
3969                         if (do_traditional) {
3970                                 yyerror(_("close: second argument is a gawk extension"));
3971                                 return NULL;
3972                         }
3973                 }
3974         } else if (do_intl                                      /* --gen-po */
3975                         && r->builtin == do_dcgettext           /* dcgettext(...) */
3976                         && subn->nexti->lasti->opcode == Op_push_i      /* 1st arg is constant */
3977                         && (subn->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
3978                 /* ala xgettext, dcgettext("some string" ...) dumps the string */
3979                 NODE *str = subn->nexti->lasti->memory;
3980
3981                 if ((str->flags & INTLSTR) != 0)
3982                         warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
3983                         /* don't dump it, the lexer already did */
3984                 else
3985                         dumpintlstr(str->stptr, str->stlen);
3986         } else if (do_intl                                      /* --gen-po */
3987                         && r->builtin == do_dcngettext          /* dcngettext(...) */
3988                         && subn->nexti->lasti->opcode == Op_push_i      /* 1st arg is constant */
3989                         && (subn->nexti->lasti->memory->flags & STRCUR) != 0    /* it's a string constant */
3990                         && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i        /* 2nd arg is constant too */
3991                         && (subn->nexti->lasti->nexti->lasti->memory->flags & STRCUR) != 0) {   /* it's a string constant */
3992                 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
3993                 NODE *str1 = subn->nexti->lasti->memory;
3994                 NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
3995
3996                 if (((str1->flags | str2->flags) & INTLSTR) != 0)
3997                         warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
3998                 else
3999                         dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
4000         } else if (r->builtin == do_asort || r->builtin == do_asorti) {
4001                 arg = subn->nexti;      /* 1st arg list */
4002                 ip = arg->lasti;
4003                 if (ip->opcode == Op_push)
4004                         ip->opcode = Op_push_array;
4005                 if (nexp >= 2) {
4006                         arg = ip->nexti;
4007                         ip = arg->lasti;
4008                         if (ip->opcode == Op_push)
4009                                 ip->opcode = Op_push_array;
4010                 }
4011         }
4012         else if (r->builtin == do_index) {
4013                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
4014                 ip = arg->lasti;
4015                 if (ip->opcode == Op_match_rec)
4016                         fatal(_("index: regexp constant as second argument is not allowed"));
4017         }
4018 #ifdef ARRAYDEBUG
4019         else if (r->builtin == do_adump) {
4020                 ip = subn->nexti->lasti;
4021                 if (ip->opcode == Op_push)
4022                         ip->opcode = Op_push_array;
4023         }
4024 #endif
4025
4026         if (subn != NULL) {
4027                 r->expr_count = count_expressions(&subn, false);
4028                 return list_append(subn, r);
4029         }
4030
4031         r->expr_count = 0;
4032         return list_create(r);
4033 }
4034
4035
4036 /* parms_shadow --- check if parameters shadow globals */
4037
4038 static int
4039 parms_shadow(INSTRUCTION *pc, bool *shadow)
4040 {
4041         int pcount, i;
4042         bool ret = false;
4043         NODE *func, *fp;
4044         char *fname;
4045
4046         func = pc->func_body;
4047         fname = func->vname;
4048         fp = func->fparms;
4049
4050 #if 0   /* can't happen, already exited if error ? */
4051         if (fname == NULL || func == NULL)      /* error earlier */
4052                 return false;
4053 #endif
4054
4055         pcount = func->param_cnt;
4056
4057         if (pcount == 0)                /* no args, no problem */
4058                 return 0;
4059
4060         source = pc->source_file;
4061         sourceline = pc->source_line;
4062         /*
4063          * Use warning() and not lintwarn() so that can warn
4064          * about all shadowed parameters.
4065          */
4066         for (i = 0; i < pcount; i++) {
4067                 if (lookup(fp[i].param) != NULL) {
4068                         warning(
4069         _("function `%s': parameter `%s' shadows global variable"),
4070                                         fname, fp[i].param);
4071                         ret = true;
4072                 }
4073         }
4074
4075         *shadow |= ret;
4076         return 0;
4077 }
4078
4079 /* valinfo --- dump var info */
4080
4081 void
4082 valinfo(NODE *n, Func_print print_func, FILE *fp)
4083 {
4084         if (n == Nnull_string)
4085                 print_func(fp, "uninitialized scalar\n");
4086         else if (n->flags & STRING) {
4087                 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
4088                 print_func(fp, "\n");
4089         } else if (n->flags & NUMBER) {
4090 #ifdef HAVE_MPFR
4091                 if (is_mpg_float(n))
4092                         print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
4093                 else if (is_mpg_integer(n))
4094                         print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
4095                 else
4096 #endif
4097                 print_func(fp, "%.17g\n", n->numbr);
4098         } else if (n->flags & STRCUR) {
4099                 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
4100                 print_func(fp, "\n");
4101         } else if (n->flags & NUMCUR) {
4102 #ifdef HAVE_MPFR
4103                 if (is_mpg_float(n))
4104                         print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
4105                 else if (is_mpg_integer(n))
4106                         print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
4107                 else
4108 #endif
4109                 print_func(fp, "%.17g\n", n->numbr);
4110         } else
4111                 print_func(fp, "?? flags %s\n", flags2str(n->flags));
4112 }
4113
4114
4115 /* dump_vars --- dump the symbol table */
4116
4117 void
4118 dump_vars(const char *fname)
4119 {
4120         FILE *fp;
4121         NODE **vars;
4122
4123         if (fname == NULL)
4124                 fp = stderr;
4125         else if ((fp = fopen(fname, "w")) == NULL) {
4126                 warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
4127                 warning(_("sending variable list to standard error"));
4128                 fp = stderr;
4129         }
4130
4131         vars = variable_list();
4132         print_vars(vars, fprintf, fp);
4133         efree(vars);
4134         if (fp != stderr && fclose(fp) != 0)
4135                 warning(_("%s: close failed (%s)"), fname, strerror(errno));
4136 }
4137
4138 /* dump_funcs --- print all functions */
4139
4140 void
4141 dump_funcs()
4142 {
4143         NODE **funcs;
4144         funcs = function_list(true);
4145         (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) pp_func, (void *) 0);
4146         efree(funcs);
4147 }
4148
4149
4150 /* shadow_funcs --- check all functions for parameters that shadow globals */
4151
4152 void
4153 shadow_funcs()
4154 {
4155         static int calls = 0;
4156         bool shadow = false;
4157         NODE **funcs;
4158
4159         if (calls++ != 0)
4160                 fatal(_("shadow_funcs() called twice!"));
4161
4162         funcs = function_list(true);
4163         (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) parms_shadow, & shadow);
4164         efree(funcs);
4165
4166         /* End with fatal if the user requested it.  */
4167         if (shadow && lintfunc != warning)
4168                 lintwarn(_("there were shadowed variables."));
4169 }
4170
4171
4172 /* mk_function --- finalize function definition node; remove parameters
4173  *      out of the symbol table.
4174  */
4175
4176 static INSTRUCTION *
4177 mk_function(INSTRUCTION *fi, INSTRUCTION *def)
4178 {
4179         NODE *thisfunc;
4180
4181         thisfunc = fi->func_body;
4182         assert(thisfunc != NULL);
4183
4184         if (do_optimize && def->lasti->opcode == Op_pop) {
4185                 /* tail call which does not return any value. */
4186
4187                 INSTRUCTION *t;
4188
4189                 for (t = def->nexti; t->nexti != def->lasti; t = t->nexti)
4190                         ;
4191                 if (t->opcode == Op_func_call
4192                     && strcmp(t->func_name, thisfunc->vname) == 0)
4193                         (t + 1)->tail_call = true;
4194         }
4195
4196         /* add an implicit return at end;
4197          * also used by 'return' command in debugger
4198          */
4199
4200         (void) list_append(def, instruction(Op_push_i));
4201         def->lasti->memory = dupnode(Nnull_string);
4202         (void) list_append(def, instruction(Op_K_return));
4203
4204         if (do_pretty_print)
4205                 (void) list_prepend(def, instruction(Op_exec_count));
4206
4207         /* fi->opcode = Op_func */
4208         (fi + 1)->firsti = def->nexti;
4209         (fi + 1)->lasti = def->lasti;
4210         (fi + 2)->first_line = fi->source_line;
4211         (fi + 2)->last_line = lastline;
4212         fi->nexti = def->nexti;
4213         bcfree(def);
4214
4215         (void) list_append(rule_list, fi + 1);  /* debugging */
4216
4217         /* update lint table info */
4218         func_use(thisfunc->vname, FUNC_DEFINE);
4219
4220         /* remove params from symbol table */
4221         remove_params(thisfunc);
4222         return fi;
4223 }
4224
4225 /* 
4226  * install_function:
4227  * install function name in the symbol table.
4228  * Extra work, build up and install a list of the parameter names.
4229  */
4230
4231 static int
4232 install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist)
4233 {
4234         NODE *r, *f;
4235         int pcount = 0;
4236
4237         r = lookup(fname);
4238         if (r != NULL || is_deferred_variable(fname)) {
4239                 error_ln(fi->source_line, _("function name `%s' previously defined"), fname);
4240                 return -1;
4241         }
4242
4243         if (plist != NULL)
4244                 pcount = plist->lasti->param_count + 1;
4245         f = install_symbol(fname, Node_func);
4246         fi->func_body = f;
4247         f->param_cnt = pcount;
4248         f->code_ptr = fi;
4249         f->fparms = NULL; 
4250         if (pcount > 0) {
4251                 char **pnames;
4252                 pnames = check_params(fname, pcount, plist);    /* frees plist */
4253                 f->fparms = make_params(pnames, pcount);
4254                 efree(pnames);
4255                 install_params(f);
4256         }
4257         return 0;
4258 }
4259
4260
4261 /* check_params --- build a list of function parameter names after
4262  *      making sure that the names are valid and there are no duplicates.
4263  */
4264
4265 static char **
4266 check_params(char *fname, int pcount, INSTRUCTION *list)
4267 {
4268         INSTRUCTION *p, *np;
4269         int i, j;
4270         char *name;
4271         char **pnames;
4272
4273         assert(pcount > 0);
4274
4275         emalloc(pnames, char **, pcount * sizeof(char *), "check_params");
4276
4277         for (i = 0, p = list->nexti; p != NULL; i++, p = np) {
4278                 np = p->nexti;
4279                 name = p->lextok;
4280                 p->lextok = NULL;
4281
4282                 if (strcmp(name, fname) == 0) {
4283                         /* check for function foo(foo) { ... }.  bleah. */
4284                         error_ln(p->source_line,
4285                                 _("function `%s': can't use function name as parameter name"), fname);
4286                 } else if (is_std_var(name)) {
4287                         error_ln(p->source_line,
4288                                 _("function `%s': can't use special variable `%s' as a function parameter"),
4289                                         fname, name);
4290                 }
4291
4292                 /* check for duplicate parameters */
4293                 for (j = 0; j < i; j++) {
4294                         if (strcmp(name, pnames[j]) == 0) {
4295                                 error_ln(p->source_line,
4296                                         _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
4297                                         fname, i + 1, name, j + 1);
4298                         }
4299                 }
4300
4301                 pnames[i] = name;
4302                 bcfree(p);
4303         }
4304         bcfree(list);
4305
4306         return pnames; 
4307 }
4308
4309
4310 #ifdef HASHSIZE
4311 undef HASHSIZE
4312 #endif
4313 #define HASHSIZE 1021
4314  
4315 static struct fdesc {
4316         char *name;
4317         short used;
4318         short defined;
4319         short extension;
4320         struct fdesc *next;
4321 } *ftable[HASHSIZE];
4322
4323 /* func_use --- track uses and definitions of functions */
4324
4325 static void
4326 func_use(const char *name, enum defref how)
4327 {
4328         struct fdesc *fp;
4329         int len;
4330         int ind;
4331
4332         len = strlen(name);
4333         ind = hash(name, len, HASHSIZE, NULL);
4334
4335         for (fp = ftable[ind]; fp != NULL; fp = fp->next)
4336                 if (strcmp(fp->name, name) == 0)
4337                         goto update_value;
4338
4339         /* not in the table, fall through to allocate a new one */
4340
4341         emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
4342         memset(fp, '\0', sizeof(struct fdesc));
4343         emalloc(fp->name, char *, len + 1, "func_use");
4344         strcpy(fp->name, name);
4345         fp->next = ftable[ind];
4346         ftable[ind] = fp;
4347
4348 update_value:
4349         if (how == FUNC_DEFINE)
4350                 fp->defined++;
4351         else if (how == FUNC_EXT) {
4352                 fp->defined++;
4353                 fp->extension++;
4354         } else
4355                 fp->used++;
4356 }
4357
4358 /* track_ext_func --- add an extension function to the table */
4359
4360 void
4361 track_ext_func(const char *name)
4362 {
4363         func_use(name, FUNC_EXT);
4364 }
4365
4366 /* check_funcs --- verify functions that are called but not defined */
4367
4368 static void
4369 check_funcs()
4370 {
4371         struct fdesc *fp, *next;
4372         int i;
4373
4374         if (! in_main_context())
4375                 goto free_mem;
4376  
4377         for (i = 0; i < HASHSIZE; i++) {
4378                 for (fp = ftable[i]; fp != NULL; fp = fp->next) {
4379 #ifdef REALLYMEAN
4380                         /* making this the default breaks old code. sigh. */
4381                         if (fp->defined == 0 && ! fp->extension) {
4382                                 error(
4383                 _("function `%s' called but never defined"), fp->name);
4384                                 errcount++;
4385                         }
4386 #else
4387                         if (do_lint && fp->defined == 0 && ! fp->extension)
4388                                 lintwarn(
4389                 _("function `%s' called but never defined"), fp->name);
4390 #endif
4391
4392                         if (do_lint && fp->used == 0 && ! fp->extension) {
4393                                 lintwarn(_("function `%s' defined but never called directly"),
4394                                         fp->name);
4395                         }
4396                 }
4397         }
4398
4399 free_mem:
4400         /* now let's free all the memory */
4401         for (i = 0; i < HASHSIZE; i++) {
4402                 for (fp = ftable[i]; fp != NULL; fp = next) {
4403                         next = fp->next;
4404                         efree(fp->name);
4405                         efree(fp);
4406                 }
4407                 ftable[i] = NULL;
4408         }
4409 }
4410
4411 /* param_sanity --- look for parameters that are regexp constants */
4412
4413 static void
4414 param_sanity(INSTRUCTION *arglist)
4415 {
4416         INSTRUCTION *argl, *arg;
4417         int i = 1;
4418
4419         if (arglist == NULL)
4420                 return;
4421         for (argl = arglist->nexti; argl; ) {
4422                 arg = argl->lasti;
4423                 if (arg->opcode == Op_match_rec)
4424                         warning_ln(arg->source_line,
4425                                 _("regexp constant for parameter #%d yields boolean value"), i);
4426                 argl = arg->nexti;
4427                 i++;
4428         }
4429 }
4430
4431 /* deferred variables --- those that are only defined if needed. */
4432
4433 /*
4434  * Is there any reason to use a hash table for deferred variables?  At the
4435  * moment, there are only 1 to 3 such variables, so it may not be worth
4436  * the overhead.  If more modules start using this facility, it should
4437  * probably be converted into a hash table.
4438  */
4439
4440 static struct deferred_variable {
4441         NODE *(*load_func)(void);
4442         struct deferred_variable *next;
4443         char name[1];   /* variable-length array */
4444 } *deferred_variables;
4445
4446 /* register_deferred_variable --- add a var name and loading function to the list */
4447
4448 void
4449 register_deferred_variable(const char *name, NODE *(*load_func)(void))
4450 {
4451         struct deferred_variable *dv;
4452         size_t sl = strlen(name);
4453
4454         emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
4455                 "register_deferred_variable");
4456         dv->load_func = load_func;
4457         dv->next = deferred_variables;
4458         memcpy(dv->name, name, sl+1);
4459         deferred_variables = dv;
4460 }
4461
4462 /* is_deferred_variable --- check if NAME is a deferred variable */
4463
4464 static bool
4465 is_deferred_variable(const char *name)
4466 {
4467         struct deferred_variable *dv;
4468         for (dv = deferred_variables; dv != NULL; dv = dv->next)
4469                 if (strcmp(name, dv->name) == 0)
4470                         return true;
4471         return false;
4472 }
4473
4474
4475 /* variable --- make sure NAME is in the symbol table */
4476
4477 NODE *
4478 variable(int location, char *name, NODETYPE type)
4479 {
4480         NODE *r;
4481
4482         if ((r = lookup(name)) != NULL) {
4483                 if (r->type == Node_func || r->type == Node_ext_func )
4484                         error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"),
4485                                 r->vname);
4486                 if (r == symbol_table)
4487                         symtab_used = true;
4488         } else {
4489                 /* not found */
4490                 struct deferred_variable *dv;
4491
4492                 for (dv = deferred_variables; true; dv = dv->next) {
4493                         if (dv == NULL) {
4494                                 /*
4495                                  * This is the only case in which we may not free the string.
4496                                  */
4497                                 return install_symbol(name, type);
4498                         }
4499                         if (strcmp(name, dv->name) == 0) {
4500                                 r = (*dv->load_func)();
4501                                 break;
4502                         }
4503                 }
4504         }
4505         efree(name);
4506         return r;
4507 }
4508
4509 /* process_deferred --- if the program uses SYMTAB, load deferred variables */
4510
4511 static void
4512 process_deferred()
4513 {
4514         struct deferred_variable *dv;
4515
4516         if (! symtab_used)
4517                 return;
4518
4519         for (dv = deferred_variables; dv != NULL; dv = dv->next) {
4520                 (void) dv->load_func();
4521         }
4522 }
4523
4524 /* make_regnode --- make a regular expression node */
4525
4526 static NODE *
4527 make_regnode(int type, NODE *exp)
4528 {
4529         NODE *n;
4530
4531         getnode(n);
4532         memset(n, 0, sizeof(NODE));
4533         n->type = type;
4534         n->re_cnt = 1;
4535
4536         if (type == Node_regex) {
4537                 n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false);
4538                 if (n->re_reg == NULL) {
4539                         freenode(n);
4540                         return NULL;
4541                 }
4542                 n->re_exp = exp;
4543                 n->re_flags = CONSTANT;
4544         }
4545         return n;
4546 }
4547
4548
4549 /* mk_rexp --- make a regular expression constant */
4550
4551 static NODE *
4552 mk_rexp(INSTRUCTION *list)
4553 {
4554         INSTRUCTION *ip;
4555
4556         ip = list->nexti;
4557         if (ip == list->lasti && ip->opcode == Op_match_rec)
4558                 ip->opcode = Op_push_re;
4559         else {
4560                 ip = instruction(Op_push_re);
4561                 ip->memory = make_regnode(Node_dynregex, NULL);
4562                 ip->nexti = list->lasti->nexti;
4563                 list->lasti->nexti = ip;
4564                 list->lasti = ip;
4565         }
4566         return ip->memory;
4567 }
4568
4569 #ifndef NO_LINT
4570 /* isnoeffect --- when used as a statement, has no side effects */
4571
4572 static int
4573 isnoeffect(OPCODE type)
4574 {
4575         switch (type) {
4576         case Op_times:
4577         case Op_times_i:
4578         case Op_quotient:
4579         case Op_quotient_i:
4580         case Op_mod:
4581         case Op_mod_i:
4582         case Op_plus:
4583         case Op_plus_i:
4584         case Op_minus:
4585         case Op_minus_i:
4586         case Op_subscript:
4587         case Op_concat:
4588         case Op_exp:
4589         case Op_exp_i:
4590         case Op_unary_minus:
4591         case Op_field_spec:
4592         case Op_and_final:
4593         case Op_or_final:
4594         case Op_equal:
4595         case Op_notequal:
4596         case Op_less:
4597         case Op_greater:
4598         case Op_leq:
4599         case Op_geq:
4600         case Op_match:
4601         case Op_nomatch:
4602         case Op_match_rec:
4603         case Op_not:
4604         case Op_in_array:
4605                 return true;
4606         default:
4607                 break;  /* keeps gcc -Wall happy */
4608         }
4609
4610         return false;
4611 }
4612 #endif /* NO_LINT */
4613
4614
4615 /* make_assignable --- make this operand an assignable one if posiible */
4616
4617 static INSTRUCTION *
4618 make_assignable(INSTRUCTION *ip)
4619 {
4620         switch (ip->opcode) {
4621         case Op_push:
4622                 ip->opcode = Op_push_lhs;
4623                 return ip;
4624         case Op_field_spec:
4625                 ip->opcode = Op_field_spec_lhs;
4626                 return ip;
4627         case Op_subscript:
4628                 ip->opcode = Op_subscript_lhs;
4629                 return ip;
4630         default:
4631                 break;  /* keeps gcc -Wall happy */
4632         }
4633         return NULL;
4634 }
4635
4636 /* stopme --- for debugging */
4637
4638 NODE *
4639 stopme(int nargs ATTRIBUTE_UNUSED)
4640 {
4641         return make_number(0.0);
4642 }
4643
4644 /* dumpintlstr --- write out an initial .po file entry for the string */
4645
4646 static void
4647 dumpintlstr(const char *str, size_t len)
4648 {
4649         char *cp;
4650
4651         /* See the GNU gettext distribution for details on the file format */
4652
4653         if (source != NULL) {
4654                 /* ala the gettext sources, remove leading `./'s */
4655                 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4656                         continue;
4657                 printf("#: %s:%d\n", cp, sourceline);
4658         }
4659
4660         printf("msgid ");
4661         pp_string_fp(fprintf, stdout, str, len, '"', true);
4662         putchar('\n');
4663         printf("msgstr \"\"\n\n");
4664         fflush(stdout);
4665 }
4666
4667 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
4668
4669 static void
4670 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
4671 {
4672         char *cp;
4673
4674         /* See the GNU gettext distribution for details on the file format */
4675
4676         if (source != NULL) {
4677                 /* ala the gettext sources, remove leading `./'s */
4678                 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4679                         continue;
4680                 printf("#: %s:%d\n", cp, sourceline);
4681         }
4682
4683         printf("msgid ");
4684         pp_string_fp(fprintf, stdout, str1, len1, '"', true);
4685         putchar('\n');
4686         printf("msgid_plural ");
4687         pp_string_fp(fprintf, stdout, str2, len2, '"', true);
4688         putchar('\n');
4689         printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
4690         fflush(stdout);
4691 }
4692
4693 /* mk_binary --- instructions for binary operators */
4694
4695 static INSTRUCTION *
4696 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
4697 {
4698         INSTRUCTION *ip1,*ip2;
4699         AWKNUM res;
4700
4701         ip2 = s2->nexti;
4702         if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
4703         /* do any numeric constant folding */
4704                 ip1 = s1->nexti;
4705                 if (do_optimize
4706                                 && ip1 == s1->lasti && ip1->opcode == Op_push_i
4707                                 && (ip1->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
4708                                 && (ip2->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
4709                 ) {
4710                         NODE *n1 = ip1->memory, *n2 = ip2->memory;
4711                         res = force_number(n1)->numbr;
4712                         (void) force_number(n2);
4713                         switch (op->opcode) {
4714                         case Op_times:
4715                                 res *= n2->numbr;
4716                                 break;
4717                         case Op_quotient:
4718                                 if (n2->numbr == 0.0) {
4719                                         /* don't fatalize, allow parsing rest of the input */
4720                                         error_ln(op->source_line, _("division by zero attempted"));
4721                                         goto regular;
4722                                 }
4723
4724                                 res /= n2->numbr;
4725                                 break;
4726                         case Op_mod:
4727                                 if (n2->numbr == 0.0) {
4728                                         /* don't fatalize, allow parsing rest of the input */
4729                                         error_ln(op->source_line, _("division by zero attempted in `%%'"));
4730                                         goto regular;
4731                                 }
4732 #ifdef HAVE_FMOD
4733                                 res = fmod(res, n2->numbr);
4734 #else   /* ! HAVE_FMOD */
4735                                 (void) modf(res / n2->numbr, &res);
4736                                 res = n1->numbr - res * n2->numbr;
4737 #endif  /* ! HAVE_FMOD */
4738                                 break;
4739                         case Op_plus:
4740                                 res += n2->numbr;
4741                                 break;
4742                         case Op_minus:
4743                                 res -= n2->numbr;
4744                                 break;
4745                         case Op_exp:
4746                                 res = calc_exp(res, n2->numbr);
4747                                 break;
4748                         default:
4749                                 goto regular;
4750                         }
4751
4752                         op->opcode = Op_push_i;
4753                         op->memory = make_number(res);
4754                         unref(n1);
4755                         unref(n2);
4756                         bcfree(ip1);
4757                         bcfree(ip2);
4758                         bcfree(s1);
4759                         bcfree(s2);
4760                         return list_create(op);
4761                 } else {
4762                 /* do basic arithmetic optimisation */
4763                 /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
4764                         switch (op->opcode) {
4765                         case Op_times:
4766                                 op->opcode = Op_times_i;
4767                                 break;
4768                         case Op_quotient:
4769                                 op->opcode = Op_quotient_i;
4770                                 break;
4771                         case Op_mod:
4772                                 op->opcode = Op_mod_i;
4773                                 break;
4774                         case Op_plus:
4775                                 op->opcode = Op_plus_i;
4776                                 break;
4777                         case Op_minus:
4778                                 op->opcode = Op_minus_i;
4779                                 break;
4780                         case Op_exp:
4781                                 op->opcode = Op_exp_i;
4782                                 break;
4783                         default:
4784                                 goto regular;
4785                         }       
4786
4787                         op->memory = ip2->memory;
4788                         bcfree(ip2);
4789                         bcfree(s2);     /* Op_list */
4790                         return list_append(s1, op);
4791                 }
4792         }
4793
4794 regular:
4795         /* append lists s1, s2 and add `op' bytecode */
4796         (void) list_merge(s1, s2);
4797         return list_append(s1, op);
4798 }
4799
4800 /* mk_boolean --- instructions for boolean and, or */
4801  
4802 static INSTRUCTION *
4803 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
4804 {
4805         INSTRUCTION *tp;
4806         OPCODE opc, final_opc;
4807
4808         opc = op->opcode;               /* Op_and or Op_or */
4809         final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
4810
4811         add_lint(right, LINT_assign_in_cond);
4812
4813         tp = left->lasti;
4814
4815         if (tp->opcode != final_opc) {  /* x || y */
4816                 list_append(right, instruction(final_opc));
4817                 add_lint(left, LINT_assign_in_cond);
4818                 (void) list_append(left, op);
4819                 left->lasti->target_jmp = right->lasti;
4820
4821                 /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
4822                  *     target_stmt only used in the parser (see below).
4823                  */
4824
4825                 left->lasti->target_stmt = left->lasti;
4826                 right->lasti->target_stmt = left->lasti;
4827         } else {                /* optimization for x || y || z || ... */
4828                 INSTRUCTION *ip;
4829                 
4830                 op->opcode = final_opc;
4831                 (void) list_append(right, op);
4832                 op->target_stmt = tp;
4833                 tp->opcode = opc;
4834                 tp->target_jmp = op;
4835
4836                 /* update jump targets */
4837                 for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
4838                         assert(ip->opcode == opc);
4839                         assert(ip->target_jmp == tp);
4840                         /* if (ip->opcode == opc &&  ip->target_jmp == tp) */
4841                         ip->target_jmp = op;
4842                         if (ip->target_stmt == ip)
4843                                 break;
4844                 }
4845         }
4846
4847         return list_merge(left, right);
4848 }
4849
4850 /* mk_condition --- if-else and conditional */
4851
4852 static INSTRUCTION *
4853 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
4854                 INSTRUCTION *elsep, INSTRUCTION *false_branch)
4855 {
4856         /*
4857          *    ----------------
4858          *       cond
4859          *    ----------------
4860          * t: [Op_jmp_false f ]
4861          *    ----------------
4862          *       true_branch
4863          *
4864          *    ----------------
4865          *    [Op_jmp y]
4866          *    ---------------- 
4867          * f:
4868          *      false_branch
4869          *    ----------------
4870          * y: [Op_no_op]
4871          *    ----------------
4872          */
4873
4874         INSTRUCTION *ip;
4875
4876         if (false_branch == NULL) {
4877                 false_branch = list_create(instruction(Op_no_op));
4878                 if (elsep != NULL) {            /* else { } */
4879                         if (do_pretty_print)
4880                                 (void) list_prepend(false_branch, elsep);
4881                         else
4882                                 bcfree(elsep);
4883                 }
4884         } else {
4885                 /* assert(elsep != NULL); */
4886
4887                 /* avoid a series of no_op's: if .. else if .. else if .. */
4888                 if (false_branch->lasti->opcode != Op_no_op)
4889                         (void) list_append(false_branch, instruction(Op_no_op));
4890                 if (do_pretty_print) {
4891                         (void) list_prepend(false_branch, elsep);
4892                         false_branch->nexti->branch_end = false_branch->lasti;
4893                         (void) list_prepend(false_branch, instruction(Op_exec_count));
4894                 } else
4895                         bcfree(elsep);
4896         }
4897
4898         (void) list_prepend(false_branch, instruction(Op_jmp));
4899         false_branch->nexti->target_jmp = false_branch->lasti;
4900
4901         add_lint(cond, LINT_assign_in_cond);
4902         ip = list_append(cond, instruction(Op_jmp_false));
4903         ip->lasti->target_jmp = false_branch->nexti->nexti;
4904
4905         if (do_pretty_print) {
4906                 (void) list_prepend(ip, ifp);
4907                 (void) list_append(ip, instruction(Op_exec_count));
4908                 ip->nexti->branch_if = ip->lasti;
4909                 ip->nexti->branch_else = false_branch->nexti;
4910         } else
4911                 bcfree(ifp);
4912
4913         if (true_branch != NULL)
4914                 list_merge(ip, true_branch);
4915         return list_merge(ip, false_branch);
4916 }
4917
4918 enum defline { FIRST_LINE, LAST_LINE };
4919
4920 /* find_line -- find the first(last) line in a list of (pattern) instructions */
4921
4922 static int
4923 find_line(INSTRUCTION *pattern, enum defline what)
4924 {
4925         INSTRUCTION *ip;
4926         int lineno = 0;
4927
4928         for (ip = pattern->nexti; ip; ip = ip->nexti) {
4929                 if (what == LAST_LINE) {
4930                         if (ip->source_line > lineno)
4931                                 lineno = ip->source_line;
4932                 } else {        /* FIRST_LINE */
4933                         if (ip->source_line > 0
4934                                         && (lineno == 0 || ip->source_line < lineno))
4935                                 lineno = ip->source_line;
4936                 }
4937                 if (ip == pattern->lasti)
4938                         break;
4939         }
4940         assert(lineno > 0);
4941         return lineno;
4942 }
4943
4944 /* append_rule --- pattern-action instructions */
4945
4946 static INSTRUCTION *
4947 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
4948 {
4949         /*
4950          *    ----------------
4951          *       pattern
4952          *    ----------------
4953          *    [Op_jmp_false f ]
4954          *    ----------------
4955          *       action
4956          *    ----------------
4957          * f: [Op_no_op       ]
4958          *    ----------------
4959          */
4960
4961         INSTRUCTION *rp;
4962         INSTRUCTION *tp;
4963         INSTRUCTION *ip;
4964
4965         if (rule != Rule) {
4966                 rp = pattern;
4967                 if (do_pretty_print)
4968                         (void) list_append(action, instruction(Op_no_op));
4969                 (rp + 1)->firsti = action->nexti;
4970                 (rp + 1)->lasti = action->lasti;
4971                 (rp + 2)->first_line = pattern->source_line;
4972                 (rp + 2)->last_line = lastline;
4973                 ip = list_prepend(action, rp);
4974
4975         } else {
4976                 rp = bcalloc(Op_rule, 3, 0);
4977                 rp->in_rule = Rule;
4978                 rp->source_file = source;
4979                 tp = instruction(Op_no_op);
4980
4981                 if (pattern == NULL) {
4982                         /* assert(action != NULL); */
4983                         if (do_pretty_print)
4984                                 (void) list_prepend(action, instruction(Op_exec_count));
4985                         (rp + 1)->firsti = action->nexti;
4986                         (rp + 1)->lasti = tp;
4987                         (rp + 2)->first_line = firstline;
4988                         (rp + 2)->last_line = lastline;
4989                         rp->source_line = firstline;
4990                         ip = list_prepend(list_append(action, tp), rp);
4991                 } else {
4992                         (void) list_append(pattern, instruction(Op_jmp_false));
4993                         pattern->lasti->target_jmp = tp;
4994                         (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
4995                         rp->source_line = (rp + 2)->first_line;
4996                         if (action == NULL) {
4997                                 (rp + 2)->last_line = find_line(pattern, LAST_LINE);
4998                                 action = list_create(instruction(Op_K_print_rec));
4999                                 if (do_pretty_print)
5000                                         (void) list_prepend(action, instruction(Op_exec_count));
5001                         } else
5002                                 (rp + 2)->last_line = lastline;
5003
5004                         if (do_pretty_print) {
5005                                 (void) list_prepend(pattern, instruction(Op_exec_count));
5006                                 (void) list_prepend(action, instruction(Op_exec_count));
5007                         }
5008                         (rp + 1)->firsti = action->nexti;
5009                         (rp + 1)->lasti = tp;
5010                         ip = list_append(
5011                                         list_merge(list_prepend(pattern, rp),
5012                                                 action),
5013                                         tp);
5014                 }
5015
5016         }
5017
5018         list_append(rule_list, rp + 1);
5019
5020         if (rule_block[rule] == NULL)
5021                 rule_block[rule] = ip;
5022         else
5023                 (void) list_merge(rule_block[rule], ip);
5024         
5025         return rule_block[rule];
5026 }
5027
5028 /* mk_assignment --- assignment bytecodes */
5029
5030 static INSTRUCTION *
5031 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
5032 {
5033         INSTRUCTION *tp;
5034         INSTRUCTION *ip;
5035
5036         tp = lhs->lasti;
5037         switch (tp->opcode) {
5038         case Op_field_spec:
5039                 tp->opcode = Op_field_spec_lhs;
5040                 break;
5041         case Op_subscript:
5042                 tp->opcode = Op_subscript_lhs;
5043                 break;
5044         case Op_push:
5045         case Op_push_array:
5046                 tp->opcode = Op_push_lhs; 
5047                 break;
5048         case Op_field_assign:
5049                 yyerror(_("cannot assign a value to the result of a field post-increment expression"));
5050                 break;
5051         default:
5052                 yyerror(_("invalid target of assignment (opcode %s)"),
5053                                 opcode2str(tp->opcode));
5054                 break;
5055         }
5056
5057         tp->do_reference = (op->opcode != Op_assign);   /* check for uninitialized reference */
5058
5059         if (rhs != NULL)
5060                 ip = list_merge(rhs, lhs);
5061         else
5062                 ip = lhs;
5063
5064         (void) list_append(ip, op);
5065
5066         if (tp->opcode == Op_push_lhs
5067                         && tp->memory->type == Node_var
5068                         && tp->memory->var_assign
5069         ) {
5070                 tp->do_reference = false; /* no uninitialized reference checking
5071                                            * for a special variable.
5072                                            */
5073                 (void) list_append(ip, instruction(Op_var_assign));
5074                 ip->lasti->assign_var = tp->memory->var_assign;
5075         } else if (tp->opcode == Op_field_spec_lhs) {
5076                 (void) list_append(ip, instruction(Op_field_assign));
5077                 ip->lasti->field_assign = (Func_ptr) 0;
5078                 tp->target_assign = ip->lasti;
5079         } else if (tp->opcode == Op_subscript_lhs) {
5080                 (void) list_append(ip, instruction(Op_subscript_assign));
5081         }
5082
5083         return ip;
5084 }
5085
5086 /* optimize_assignment --- peephole optimization for assignment */
5087
5088 static INSTRUCTION *
5089 optimize_assignment(INSTRUCTION *exp)
5090 {
5091         INSTRUCTION *i1, *i2, *i3;
5092
5093         /*
5094          * Optimize assignment statements array[subs] = x; var = x; $n = x;
5095          * string concatenation of the form s = s t.
5096          *
5097          * 1) Array element assignment array[subs] = x:
5098          *   Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
5099          *   with single instruction Op_store_sub.
5100          *       Limitation: 1 dimension and sub is simple var/value.
5101          * 
5102          * 2) Simple variable assignment var = x:
5103          *   Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
5104          *
5105          * 3) Field assignment $n = x:
5106          *   Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
5107          *   with Op_store_field.
5108          *
5109          * 4) Optimization for string concatenation:
5110          *   For cases like x = x y, uses realloc to include y in x;
5111          *   also eliminates instructions Op_push_lhs and Op_pop.
5112          */
5113
5114         /*
5115          * N.B.: do not append Op_pop instruction to the returned
5116          * instruction list if optimized. None of these
5117          * optimized instructions pushes the r-value of assignment
5118          * onto the runtime stack.
5119          */
5120
5121         i2 = NULL;
5122         i1 = exp->lasti;
5123
5124         if (   i1->opcode != Op_assign
5125             && i1->opcode != Op_field_assign) 
5126                 return list_append(exp, instruction(Op_pop));
5127
5128         for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
5129                 switch (i2->opcode) {
5130                 case Op_concat:
5131                         if (i2->nexti->opcode == Op_push_lhs    /* l.h.s is a simple variable */
5132                                 && (i2->concat_flag & CSVAR)        /* 1st exp in r.h.s is a simple variable;
5133                                                                      * see Op_concat in the grammer above.
5134                                                                      */
5135                                 && i2->nexti->memory == exp->nexti->memory       /* and the same as in l.h.s */
5136                                 && i2->nexti->nexti == i1
5137                                 && i1->opcode == Op_assign
5138                         ) {
5139                                 /* s = s ... optimization */
5140
5141                                 /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
5142                                  * check for l-value reference to this variable in the r.h.s.
5143                                  * Also, avoid function calls in general to guard against
5144                                  * global variable assignment.
5145                                  */
5146
5147                                 for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
5148                                         if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
5149                                                         || i3->opcode == Op_func_call)
5150                                                 return list_append(exp, instruction(Op_pop)); /* no optimization */
5151                                 }
5152
5153                                 /* remove the variable from r.h.s */
5154                                 i3 = exp->nexti;
5155                                 exp->nexti = i3->nexti;
5156                                 bcfree(i3);
5157
5158                                 if (--i2->expr_count == 1)      /* one less expression in Op_concat */
5159                                         i2->opcode = Op_no_op;
5160
5161                                 i3 = i2->nexti;
5162                                 assert(i3->opcode == Op_push_lhs);
5163                                 i3->opcode = Op_assign_concat;  /* change Op_push_lhs to Op_assign_concat */
5164                                 i3->nexti = NULL;
5165                                 bcfree(i1);          /* Op_assign */
5166                                 exp->lasti = i3;     /* update Op_list */
5167                                 return exp;
5168                         }
5169                         break;
5170
5171                 case Op_field_spec_lhs:
5172                         if (i2->nexti->opcode == Op_assign
5173                                         && i2->nexti->nexti == i1
5174                                         && i1->opcode == Op_field_assign
5175                         ) {
5176                                 /* $n = .. */
5177                                 i2->opcode = Op_store_field;
5178                                 bcfree(i2->nexti);  /* Op_assign */
5179                                 i2->nexti = NULL;
5180                                 bcfree(i1);          /* Op_field_assign */
5181                                 exp->lasti = i2;    /* update Op_list */
5182                                 return exp;
5183                         }
5184                         break;
5185
5186                 case Op_push_array:
5187                         if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
5188                                 i3 = i2->nexti->nexti;
5189                                 if (i3->sub_count == 1
5190                                                 && i3->nexti == i1
5191                                                 && i1->opcode == Op_assign
5192                                 ) {
5193                                         /* array[sub] = .. */
5194                                         i3->opcode = Op_store_sub;
5195                                         i3->memory = i2->memory;
5196                                         i3->expr_count = 1;  /* sub_count shadows memory,
5197                                           * so use expr_count instead.
5198                                                           */
5199                                         i3->nexti = NULL;
5200                                         i2->opcode = Op_no_op;                                  
5201                                         bcfree(i1);          /* Op_assign */
5202                                         exp->lasti = i3;     /* update Op_list */
5203                                         return exp;
5204                                 }
5205                         }
5206                         break;
5207
5208                 case Op_push_lhs:
5209                         if (i2->nexti == i1
5210                                         && i1->opcode == Op_assign
5211                         ) {
5212                                 /* var = .. */
5213                                 i2->opcode = Op_store_var;
5214                                 i2->nexti = NULL;
5215                                 bcfree(i1);          /* Op_assign */
5216                                 exp->lasti = i2;     /* update Op_list */
5217
5218                                 i3 = exp->nexti;
5219                                 if (i3->opcode == Op_push_i
5220                                         && (i3->memory->flags & INTLSTR) == 0
5221                                         && i3->nexti == i2
5222                                 ) {
5223                                         /* constant initializer */ 
5224                                         i2->initval = i3->memory;
5225                                         bcfree(i3);
5226                                         exp->nexti = i2;
5227                                 } else
5228                                         i2->initval = NULL;
5229
5230                                 return exp;
5231                         }
5232                         break;
5233
5234                 default:
5235                         break;
5236                 }
5237         }
5238
5239         /* no optimization  */
5240         return list_append(exp, instruction(Op_pop));
5241 }
5242
5243
5244 /* mk_getline --- make instructions for getline */
5245
5246 static INSTRUCTION *
5247 mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype)
5248 {
5249         INSTRUCTION *ip;
5250         INSTRUCTION *tp;
5251         INSTRUCTION *asgn = NULL;
5252
5253         /*
5254          *  getline [var] < [file]
5255          *
5256          *  [ file (simp_exp)]
5257          *  [ [ var ] ]
5258          *  [ Op_K_getline_redir|NULL|redir_type|into_var]
5259          *  [ [var_assign] ] 
5260          *
5261          */
5262
5263         if (redir == NULL) {
5264                 int sline = op->source_line;
5265                 bcfree(op);
5266                 op = bcalloc(Op_K_getline, 2, sline);
5267                 (op + 1)->target_endfile = ip_endfile;
5268                 (op + 1)->target_beginfile = ip_beginfile;      
5269         }
5270
5271         if (var != NULL) {
5272                 tp = make_assignable(var->lasti);
5273                 assert(tp != NULL);
5274
5275                 /* check if we need after_assign bytecode */
5276                 if (tp->opcode == Op_push_lhs
5277                                 && tp->memory->type == Node_var
5278                                 && tp->memory->var_assign
5279                 ) {
5280                         asgn = instruction(Op_var_assign);
5281                         asgn->assign_ctxt = op->opcode;
5282                         asgn->assign_var = tp->memory->var_assign;
5283                 } else if (tp->opcode == Op_field_spec_lhs) {
5284                         asgn = instruction(Op_field_assign);
5285                         asgn->assign_ctxt = op->opcode;
5286                         asgn->field_assign = (Func_ptr) 0;   /* determined at run time */
5287                         tp->target_assign = asgn;
5288                 } else if (tp->opcode == Op_subscript_lhs) {
5289                         asgn = instruction(Op_subscript_assign);
5290                         asgn->assign_ctxt = op->opcode;
5291                 }
5292
5293                 if (redir != NULL) {
5294                         ip = list_merge(redir, var);
5295                         (void) list_append(ip, op);
5296                 } else
5297                         ip = list_append(var, op);
5298         } else if (redir != NULL)
5299                 ip = list_append(redir, op);
5300         else
5301                 ip = list_create(op);
5302         op->into_var = (var != NULL);
5303         op->redir_type = (redir != NULL) ? redirtype : redirect_none;
5304
5305         return (asgn == NULL ? ip : list_append(ip, asgn));
5306 }
5307
5308
5309 /* mk_for_loop --- for loop bytecodes */
5310
5311 static INSTRUCTION *
5312 mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
5313                                 INSTRUCTION *incr, INSTRUCTION *body)
5314 {
5315         /*
5316          *   ------------------------
5317          *        init                 (may be NULL)
5318          *   ------------------------
5319          * x:
5320          *        cond                 (Op_no_op if NULL)
5321          *   ------------------------
5322          *    [ Op_jmp_false tb      ]
5323          *   ------------------------
5324          *        body                 (may be NULL)
5325          *   ------------------------
5326          * tc: 
5327          *    incr                      (may be NULL)
5328          *    [ Op_jmp x             ] 
5329          *   ------------------------
5330          * tb:[ Op_no_op             ] 
5331          */
5332
5333         INSTRUCTION *ip, *tbreak, *tcont;
5334         INSTRUCTION *jmp;
5335         INSTRUCTION *pp_cond;
5336         INSTRUCTION *ret;
5337
5338         tbreak = instruction(Op_no_op);
5339
5340         if (cond != NULL) {
5341                 add_lint(cond, LINT_assign_in_cond);
5342                 pp_cond = cond->nexti;
5343                 ip = cond;
5344                 (void) list_append(ip, instruction(Op_jmp_false));
5345                 ip->lasti->target_jmp = tbreak;
5346         } else {
5347                 pp_cond = instruction(Op_no_op);
5348                 ip = list_create(pp_cond);
5349         }
5350
5351         if (init != NULL)
5352                 ip = list_merge(init, ip);
5353
5354         if (do_pretty_print) {
5355                 (void) list_append(ip, instruction(Op_exec_count));
5356                 (forp + 1)->forloop_cond = pp_cond;
5357                 (forp + 1)->forloop_body = ip->lasti;
5358         }
5359
5360         if (body != NULL)
5361                 (void) list_merge(ip, body);
5362
5363         jmp = instruction(Op_jmp);
5364         jmp->target_jmp = pp_cond;
5365         if (incr == NULL)
5366                 tcont = jmp;
5367         else {
5368                 tcont = incr->nexti;
5369                 (void) list_merge(ip, incr);
5370         }
5371
5372         (void) list_append(ip, jmp);
5373         ret = list_append(ip, tbreak);
5374         fix_break_continue(ret, tbreak, tcont);
5375
5376         if (do_pretty_print) {
5377                 forp->target_break = tbreak;
5378                 forp->target_continue = tcont;
5379                 ret = list_prepend(ret, forp);
5380         } /* else
5381                         forp is NULL */
5382
5383         return ret;
5384 }
5385
5386 /* add_lint --- add lint warning bytecode if needed */
5387
5388 static void
5389 add_lint(INSTRUCTION *list, LINTTYPE linttype)
5390 {
5391 #ifndef NO_LINT
5392         INSTRUCTION *ip;
5393
5394         switch (linttype) {
5395         case LINT_assign_in_cond:
5396                 ip = list->lasti;
5397                 if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
5398                         assert(ip != list->nexti);
5399                         for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5400                                 ;
5401                 }
5402
5403                 if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
5404                         list_append(list, instruction(Op_lint));
5405                         list->lasti->lint_type = linttype;
5406                 }
5407                 break;
5408
5409         case LINT_no_effect:
5410                 if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
5411                         for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5412                                 ;
5413
5414                         if (do_lint) {          /* compile-time warning */
5415                                 if (isnoeffect(ip->opcode))
5416                                         lintwarn_ln(ip->source_line, ("statement may have no effect"));
5417                         }
5418
5419                         if (ip->opcode == Op_push) {            /* run-time warning */
5420                                 list_append(list, instruction(Op_lint));
5421                                 list->lasti->lint_type = linttype;
5422                         }
5423                 }
5424                 break;
5425
5426         default:
5427                 break;
5428         }
5429 #endif
5430 }
5431
5432 /* mk_expression_list --- list of bytecode lists */
5433
5434 static INSTRUCTION *
5435 mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
5436 {
5437         INSTRUCTION *r;
5438
5439         /* we can't just combine all bytecodes, since we need to
5440          * process individual expressions for a few builtins in snode() (-:
5441          */
5442         
5443         /* -- list of lists     */
5444         /* [Op_list| ... ]------
5445          *                       |
5446          * [Op_list| ... ]   --  |
5447          *  ...               |  |
5448          *  ...       <-------   |
5449          * [Op_list| ... ]   --  |
5450          *  ...               |  |
5451          *  ...               |  |
5452          *  ...       <------- --
5453          */
5454
5455         assert(s1 != NULL && s1->opcode == Op_list);
5456         if (list == NULL) {
5457                 list = instruction(Op_list);
5458                 list->nexti = s1;
5459                 list->lasti = s1->lasti;
5460                 return list;
5461         }
5462
5463         /* append expression to the end of the list */
5464
5465         r = list->lasti;
5466         r->nexti = s1;
5467         list->lasti = s1->lasti;
5468         return list;
5469 }
5470
5471 /* count_expressions --- fixup expression_list from mk_expression_list.
5472  *                       returns no of expressions in list. isarg is true
5473  *                       for function arguments.
5474  */
5475
5476 static int
5477 count_expressions(INSTRUCTION **list, bool isarg)
5478 {
5479         INSTRUCTION *expr;
5480         INSTRUCTION *r = NULL;
5481         int count = 0;
5482
5483         if (*list == NULL)      /* error earlier */
5484                 return 0;
5485
5486         for (expr = (*list)->nexti; expr; ) {
5487                 INSTRUCTION *t1, *t2;
5488                 t1 = expr->nexti;
5489                 t2 = expr->lasti;
5490                 if (isarg && t1 == t2 && t1->opcode == Op_push)
5491                         t1->opcode = Op_push_param;
5492                 if (++count == 1)
5493                         r = expr;
5494                 else
5495                         (void) list_merge(r, expr);
5496                 expr = t2->nexti;
5497         }
5498  
5499         assert(count > 0);
5500         if (! isarg && count > max_args)
5501                 max_args = count;
5502         bcfree(*list);
5503         *list = r;
5504         return count;
5505 }
5506
5507 /* fix_break_continue --- fix up break & continue codes in loop bodies */
5508
5509 static void
5510 fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target)
5511 {
5512         INSTRUCTION *ip;
5513
5514         list->lasti->nexti = NULL;      /* just to make sure */
5515
5516         for (ip = list->nexti; ip != NULL; ip = ip->nexti) {
5517                 switch (ip->opcode) {
5518                 case Op_K_break:
5519                         if (ip->target_jmp == NULL)
5520                                 ip->target_jmp = b_target;
5521                         break;
5522
5523                 case Op_K_continue:
5524                         if (ip->target_jmp == NULL)
5525                                 ip->target_jmp = c_target;
5526                         break;
5527
5528                 default:
5529                         /* this is to keep the compiler happy. sheesh. */
5530                         break;
5531                 }
5532         }
5533 }
5534
5535 static inline INSTRUCTION *
5536 list_create(INSTRUCTION *x)
5537 {
5538         INSTRUCTION *l;
5539
5540         l = instruction(Op_list);
5541         l->nexti = x;
5542         l->lasti = x;
5543         return l;
5544 }
5545
5546 static inline INSTRUCTION *
5547 list_append(INSTRUCTION *l, INSTRUCTION *x)
5548 {
5549 #ifdef GAWKDEBUG
5550         if (l->opcode != Op_list)
5551                 cant_happen();
5552 #endif
5553         l->lasti->nexti = x;
5554         l->lasti = x;
5555         return l;
5556 }
5557
5558 static inline INSTRUCTION *
5559 list_prepend(INSTRUCTION *l, INSTRUCTION *x)
5560 {
5561 #ifdef GAWKDEBUG
5562         if (l->opcode != Op_list)
5563                 cant_happen();
5564 #endif
5565         x->nexti = l->nexti;
5566         l->nexti = x;
5567         return l;
5568 }
5569
5570 static inline INSTRUCTION *
5571 list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
5572 {
5573 #ifdef GAWKDEBUG
5574         if (l1->opcode != Op_list)
5575                 cant_happen();
5576         if (l2->opcode != Op_list)
5577                 cant_happen();
5578 #endif
5579         l1->lasti->nexti = l2->nexti;
5580         l1->lasti = l2->lasti;
5581         bcfree(l2);
5582         return l1;
5583 }
5584
5585 /* See if name is a special token. */
5586
5587 int
5588 check_special(const char *name)
5589 {
5590         int low, high, mid;
5591         int i;
5592 #if 'a' == 0x81 /* it's EBCDIC */
5593         static bool did_sort = false;
5594
5595         if (! did_sort) {
5596                 qsort((void *) tokentab,
5597                                 sizeof(tokentab) / sizeof(tokentab[0]),
5598                                 sizeof(tokentab[0]), tokcompare);
5599                 did_sort = true;
5600         }
5601 #endif
5602
5603         low = 0;
5604         high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
5605         while (low <= high) {
5606                 mid = (low + high) / 2;
5607                 i = *name - tokentab[mid].operator[0];
5608                 if (i == 0)
5609                         i = strcmp(name, tokentab[mid].operator);
5610
5611                 if (i < 0)              /* token < mid */
5612                         high = mid - 1;
5613                 else if (i > 0)         /* token > mid */
5614                         low = mid + 1;
5615                 else {
5616                         if ((do_traditional && (tokentab[mid].flags & GAWKX))
5617                                         || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
5618                                 return -1;
5619                         return mid;
5620                 }
5621         }
5622         return -1;
5623 }
5624
5625 /*
5626  * This provides a private version of functions that act like VMS's
5627  * variable-length record filesystem, where there was a bug on
5628  * certain source files.
5629  */
5630
5631 static FILE *fp = NULL;
5632
5633 /* read_one_line --- return one input line at a time. mainly for debugging. */
5634
5635 static ssize_t
5636 read_one_line(int fd, void *buffer, size_t count)
5637 {
5638         char buf[BUFSIZ];
5639
5640         /* Minor potential memory leak here. Too bad. */
5641         if (fp == NULL) {
5642                 fp = fdopen(fd, "r");
5643                 if (fp == NULL) {
5644                         fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
5645                         gawk_exit(EXIT_FAILURE);
5646                 }
5647         }
5648
5649         if (fgets(buf, sizeof buf, fp) == NULL)
5650                 return 0;
5651
5652         memcpy(buffer, buf, strlen(buf));
5653         return strlen(buf);
5654 }
5655
5656 /* one_line_close --- close the open file being read with read_one_line() */
5657
5658 static int
5659 one_line_close(int fd)
5660 {
5661         int ret;
5662
5663         if (fp == NULL || fd != fileno(fp))
5664                 fatal("debugging read/close screwed up!");
5665
5666         ret = fclose(fp);
5667         fp = NULL;
5668         return ret;
5669 }
5670
5671