Introduced -devel and -extras subpackages for gawk
[platform/upstream/gawk.git] / awkgram.y
1 /*
2  * awkgram.y --- yacc/bison parser
3  */
4
5 /* 
6  * Copyright (C) 1986, 1988, 1989, 1991-2012 the Free Software Foundation, Inc.
7  * 
8  * This file is part of GAWK, the GNU implementation of the
9  * AWK Programming Language.
10  * 
11  * GAWK is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 3 of the License, or
14  * (at your option) any later version.
15  * 
16  * GAWK is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  * 
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
24  */
25
26 %{
27 #ifdef GAWKDEBUG
28 #define YYDEBUG 12
29 #endif
30
31 #include "awk.h"
32
33 #if defined(__STDC__) && __STDC__ < 1   /* VMS weirdness, maybe elsewhere */
34 #define signed /**/
35 #endif
36
37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
39 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
40 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
41 static char *get_src_buf(void);
42 static int yylex(void);
43 int     yyparse(void); 
44 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
45 static int func_install(INSTRUCTION *fp, INSTRUCTION *def);
46 static void pop_params(NODE *params);
47 static NODE *make_param(char *pname);
48 static NODE *mk_rexp(INSTRUCTION *exp);
49 static void append_param(char *pname);
50 static int dup_parms(INSTRUCTION *fp, NODE *func);
51 static void param_sanity(INSTRUCTION *arglist);
52 static int parms_shadow(INSTRUCTION *pc, int *shadow);
53 static int isnoeffect(OPCODE type);
54 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
55 static void dumpintlstr(const char *str, size_t len);
56 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
57 static int isarray(NODE *n);
58 static int include_source(INSTRUCTION *file);
59 static void next_sourcefile(void);
60 static char *tokexpand(void);
61
62 #define instruction(t)  bcalloc(t, 1, 0)
63
64 static INSTRUCTION *mk_program(void);
65 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
66 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
67                 INSTRUCTION *elsep,     INSTRUCTION *false_branch);
68 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
69 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
70                 INSTRUCTION *incr, INSTRUCTION *body);
71 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
72 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
73 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
74 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
75 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
76 static NODE *make_regnode(int type, NODE *exp);
77 static int count_expressions(INSTRUCTION **list, int isarg);
78 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
79 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
80
81 enum defref { FUNC_DEFINE, FUNC_USE };
82 static void func_use(const char *name, enum defref how);
83 static void check_funcs(void);
84 static void free_bcpool(INSTRUCTION *pl);
85
86 static ssize_t read_one_line(int fd, void *buffer, size_t count);
87 static int one_line_close(int fd);
88
89 static void (*install_func)(char *) = NULL;
90
91 static int want_source = FALSE;
92 static int want_regexp;         /* lexical scanning kludge */
93 static int can_return;          /* parsing kludge */
94 static int rule = 0;
95
96 const char *const ruletab[] = {
97         "?",
98         "BEGIN",
99         "Rule",
100         "END",
101         "BEGINFILE",
102         "ENDFILE",
103 };
104
105 static int in_print = FALSE;    /* lexical scanning kludge for print */
106 static int in_parens = 0;       /* lexical scanning kludge for print */
107 static int sub_counter = 0;     /* array dimension counter for use in delete */
108 static char *lexptr = NULL;             /* pointer to next char during parsing */
109 static char *lexend;
110 static char *lexptr_begin;      /* keep track of where we were for error msgs */
111 static char *lexeme;            /* beginning of lexeme for debugging */
112 static int lexeof;                      /* seen EOF for current source? */  
113 static char *thisline = NULL;
114 static int in_braces = 0;       /* count braces for firstline, lastline in an 'action' */
115 static int lastline = 0;
116 static int firstline = 0;
117 static SRCFILE *sourcefile = NULL;      /* current program source */
118 static int lasttok = 0;
119 static int eof_warned = FALSE;  /* GLOBAL: want warning for each file */
120 static int break_allowed;       /* kludge for break */
121 static int continue_allowed;    /* kludge for continue */
122
123
124 #define END_FILE        -1000
125 #define END_SRC         -2000
126
127 #define YYDEBUG_LEXER_TEXT (lexeme)
128 static int param_counter;
129 static NODE *func_params;       /* list of parameters for the current function */
130 static char *tokstart = NULL;
131 static char *tok = NULL;
132 static char *tokend;
133 static int errcount = 0;
134
135 static NODE *symbol_list;
136 extern void destroy_symbol(char *name); 
137
138 static long func_count;         /* total number of functions */
139
140 #define HASHSIZE        1021    /* this constant only used here */
141 NODE *variables[HASHSIZE];
142 static int var_count;           /* total number of global variables */
143
144 extern char *source;
145 extern int sourceline;
146 extern SRCFILE *srcfiles;
147 extern INSTRUCTION *rule_list;
148 extern int max_args;
149
150 static INSTRUCTION *rule_block[sizeof(ruletab)];
151
152 static INSTRUCTION *ip_rec;
153 static INSTRUCTION *ip_newfile;
154 static INSTRUCTION *ip_atexit = NULL;
155 static INSTRUCTION *ip_end;
156 static INSTRUCTION *ip_endfile;
157 static INSTRUCTION *ip_beginfile;
158
159 static inline INSTRUCTION *list_create(INSTRUCTION *x);
160 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
161 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
162 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
163
164 extern double fmod(double x, double y);
165 /*
166  * This string cannot occur as a real awk identifier.
167  * Use it as a special token to make function parsing
168  * uniform, but if it's seen, don't install the function.
169  * e.g.
170  *      function split(x) { return x }
171  *      function x(a) { return a }
172  * should only produce one error message, and not core dump.
173  */
174 static char builtin_func[] = "@builtin";
175
176 #define YYSTYPE INSTRUCTION *
177 %}
178
179 %token FUNC_CALL NAME REGEXP FILENAME
180 %token YNUMBER YSTRING
181 %token RELOP IO_OUT IO_IN
182 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
183 %token SUBSCRIPT
184 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
185 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
186 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
187 %token LEX_BEGINFILE LEX_ENDFILE 
188 %token LEX_GETLINE LEX_NEXTFILE
189 %token LEX_IN
190 %token LEX_AND LEX_OR INCREMENT DECREMENT
191 %token LEX_BUILTIN LEX_LENGTH
192 %token LEX_EOF
193 %token LEX_INCLUDE LEX_EVAL
194 %token NEWLINE
195
196 /* Lowest to highest */
197 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
198 %right '?' ':'
199 %left LEX_OR
200 %left LEX_AND
201 %left LEX_GETLINE
202 %nonassoc LEX_IN
203 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
204 %nonassoc ','
205 %left MATCHOP
206 %nonassoc RELOP '<' '>' IO_IN IO_OUT
207 %left CONCAT_OP
208 %left YSTRING YNUMBER
209 %left '+' '-'
210 %left '*' '/' '%'
211 %right '!' UNARY
212 %right '^'
213 %left INCREMENT DECREMENT
214 %left '$'
215 %left '(' ')'
216 %%
217
218 program
219         : /* empty */
220         | program rule
221           {
222                 rule = 0;
223                 yyerrok;
224           }
225         | program nls
226         | program LEX_EOF
227           {
228                 next_sourcefile();
229           }
230         | program error
231           {
232                 rule = 0;
233                 /*
234                  * If errors, give up, don't produce an infinite
235                  * stream of syntax error messages.
236                  */
237                 /* yyerrok; */
238           }
239         ;
240
241 rule
242         : pattern action
243           {
244                 (void) append_rule($1, $2);
245           }
246         | pattern statement_term
247           {
248                 if (rule != Rule) {
249                         msg(_("%s blocks must have an action part"), ruletab[rule]);
250                         errcount++;
251                 } else if ($1 == NULL) {
252                         msg(_("each rule must have a pattern or an action part"));
253                         errcount++;
254                 } else          /* pattern rule with non-empty pattern */
255                         (void) append_rule($1, NULL);
256           }
257         | function_prologue action
258           {
259                 can_return = FALSE;
260                 if ($1 && func_install($1, $2) < 0)
261                         YYABORT;
262                 func_params = NULL;
263                 yyerrok;
264           }
265         | '@' LEX_INCLUDE source statement_term
266           {
267                 want_source = FALSE;
268                 yyerrok;
269           }
270         ;
271
272 source
273         : FILENAME
274           {
275                 if (include_source($1) < 0)
276                         YYABORT;
277                 efree($1->lextok);
278                 bcfree($1);
279                 $$ = NULL;
280           }
281         | FILENAME error
282           { $$ = NULL; }
283         | error
284           { $$ = NULL; }
285         ;
286
287 pattern
288         : /* empty */
289           {     $$ = NULL; rule = Rule; }
290         | exp
291           {     $$ = $1; rule = Rule; }
292         | exp ',' opt_nls exp
293           {
294                 INSTRUCTION *tp;
295
296                 add_lint($1, LINT_assign_in_cond);
297                 add_lint($4, LINT_assign_in_cond);
298
299                 tp = instruction(Op_no_op);
300                 list_prepend($1, bcalloc(Op_line_range, !!do_profiling + 1, 0));
301                 $1->nexti->triggered = FALSE;
302                 $1->nexti->target_jmp = $4->nexti;
303
304                 list_append($1, instruction(Op_cond_pair));
305                 $1->lasti->line_range = $1->nexti;
306                 $1->lasti->target_jmp = tp;
307
308                 list_append($4, instruction(Op_cond_pair));
309                 $4->lasti->line_range = $1->nexti;
310                 $4->lasti->target_jmp = tp;
311                 if (do_profiling) {
312                         ($1->nexti + 1)->condpair_left = $1->lasti;
313                         ($1->nexti + 1)->condpair_right = $4->lasti;
314                 }
315                 $$ = list_append(list_merge($1, $4), tp);
316                 rule = Rule;
317           }
318         | LEX_BEGIN
319           {
320                 static int begin_seen = 0;
321                 if (do_lint_old && ++begin_seen == 2)
322                         warning_ln($1->source_line,
323                                 _("old awk does not support multiple `BEGIN' or `END' rules"));
324
325                 $1->in_rule = rule = BEGIN;
326                 $1->source_file = source;
327                 $$ = $1;
328           }
329         | LEX_END
330           {
331                 static int end_seen = 0;
332                 if (do_lint_old && ++end_seen == 2)
333                         warning_ln($1->source_line,
334                                 _("old awk does not support multiple `BEGIN' or `END' rules"));
335
336                 $1->in_rule = rule = END;
337                 $1->source_file = source;
338                 $$ = $1;
339           }
340         | LEX_BEGINFILE
341           {
342                 $1->in_rule = rule = BEGINFILE;
343                 $1->source_file = source;
344                 $$ = $1;
345           }
346         | LEX_ENDFILE
347           {
348                 $1->in_rule = rule = ENDFILE;
349                 $1->source_file = source;
350                 $$ = $1;
351           }
352         ;
353
354 action
355         : l_brace statements r_brace opt_semi opt_nls
356           {
357                 if ($2 == NULL)
358                         $$ = list_create(instruction(Op_no_op));
359                 else
360                         $$ = $2;
361           }
362         ;
363
364 func_name
365         : NAME
366           { $$ = $1; }
367         | FUNC_CALL
368           { $$ = $1; }
369         | lex_builtin
370           {
371                 yyerror(_("`%s' is a built-in function, it cannot be redefined"),
372                         tokstart);
373                 $1->opcode = Op_symbol; /* Op_symbol instead of Op_token so that
374                                          * free_bc_internal does not try to free it
375                                          */
376                 $1->lextok = builtin_func;
377                 $$ = $1;
378                 /* yyerrok; */
379           }
380         | '@' LEX_EVAL
381           { $$ = $2; }
382         ;
383
384 lex_builtin
385         : LEX_BUILTIN
386         | LEX_LENGTH
387         ;
388                 
389 function_prologue
390         : LEX_FUNCTION
391           {
392                 param_counter = 0;
393                 func_params = NULL;
394           }
395                 func_name '(' opt_param_list r_paren opt_nls
396                 {
397                         NODE *t;
398
399                         $1->source_file = source;
400                         t = make_param($3->lextok);
401                         $3->lextok = NULL;
402                         bcfree($3);
403                         t->flags |= FUNC;
404                         t->rnode = func_params;
405                         func_params = t;
406                         $$ = $1;
407                         can_return = TRUE;
408                         /* check for duplicate parameter names */
409                         if (dup_parms($1, t))
410                                 errcount++;
411                 }
412         ;
413
414 regexp
415         /*
416          * In this rule, want_regexp tells yylex that the next thing
417          * is a regexp so it should read up to the closing slash.
418          */
419         : a_slash
420                 { ++want_regexp; }
421           REGEXP        /* The terminating '/' is consumed by yylex(). */
422                 {
423                   NODE *n, *exp;
424                   char *re;
425                   size_t len;
426
427                   re = $3->lextok;
428                   len = strlen(re);
429                   if (do_lint) {
430                         if (len == 0)
431                                 lintwarn_ln($3->source_line,
432                                         _("regexp constant `//' looks like a C++ comment, but is not"));
433                         else if ((re)[0] == '*' && (re)[len-1] == '*')
434                                 /* possible C comment */
435                                 lintwarn_ln($3->source_line,
436                                         _("regexp constant `/%s/' looks like a C comment, but is not"), re);
437                   }
438
439                   exp = make_str_node(re, len, ALREADY_MALLOCED);
440                   n = make_regnode(Node_regex, exp);
441                   if (n == NULL) {
442                         unref(exp);
443                         YYABORT;
444                   }
445                   $$ = $3;
446                   $$->opcode = Op_match_rec;
447                   $$->memory = n;
448                 }
449         ;
450
451 a_slash
452         : '/'
453           { bcfree($1); }
454         | SLASH_BEFORE_EQUAL
455         ;
456
457 statements
458         : /* empty */
459           {     $$ = NULL; }
460         | statements statement
461           {
462                 if ($2 == NULL)
463                         $$ = $1;
464                 else {
465                         add_lint($2, LINT_no_effect);
466                         if ($1 == NULL)
467                                 $$ = $2;
468                         else
469                                 $$ = list_merge($1, $2);
470                 }
471             yyerrok;
472           }
473         | statements error
474           {     $$ = NULL; }
475         ;
476
477 statement_term
478         : nls
479         | semi opt_nls
480         ;
481
482 statement
483         : semi opt_nls
484           { $$ = NULL; }
485         | l_brace statements r_brace
486           { $$ = $2; }
487         | if_statement
488           {
489                 if (do_profiling)
490                         $$ = list_prepend($1, instruction(Op_exec_count));
491                 else
492                         $$ = $1;
493           }
494         | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
495           {
496                 INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
497                 INSTRUCTION *ip, *nextc, *tbreak;
498                 const char **case_values = NULL;
499                 int maxcount = 128;
500                 int case_count = 0;
501                 int i;
502
503                 tbreak = instruction(Op_no_op); 
504                 cstmt = list_create(tbreak);
505                 cexp = list_create(instruction(Op_pop));
506                 dflt = instruction(Op_jmp);
507                 dflt->target_jmp = tbreak;      /* if no case match and no explicit default */
508
509                 if ($7 != NULL) {
510                         curr = $7->nexti;
511                         bcfree($7);     /* Op_list */
512                 } /*  else
513                                 curr = NULL; */
514
515                 for(; curr != NULL; curr = nextc) {
516                         INSTRUCTION *caseexp = curr->case_exp;
517                         INSTRUCTION *casestmt = curr->case_stmt;
518
519                         nextc = curr->nexti;
520                         if (curr->opcode == Op_K_case) {
521                                 if (caseexp->opcode == Op_push_i) {
522                                         /* a constant scalar */
523                                         char *caseval;
524                                         caseval = force_string(caseexp->memory)->stptr;
525                                         for (i = 0; i < case_count; i++) {
526                                                 if (strcmp(caseval, case_values[i]) == 0)
527                                                         error_ln(curr->source_line,
528                                                                 _("duplicate case values in switch body: %s"), caseval);
529                                         }
530  
531                                         if (case_values == NULL)
532                                                 emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
533                                         else if (case_count >= maxcount) {
534                                                 maxcount += 128;
535                                                 erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
536                                         }
537                                         case_values[case_count++] = caseval;
538                                 } else {
539                                         /* match a constant regex against switch expression. */
540                                         (curr + 1)->match_exp = TRUE;
541                                 }
542                                 curr->stmt_start = casestmt->nexti;
543                                 curr->stmt_end  = casestmt->lasti;
544                                 (void) list_prepend(cexp, curr);
545                                 (void) list_prepend(cexp, caseexp);
546                         } else {
547                                 if (dflt->target_jmp != tbreak)
548                                         error_ln(curr->source_line,
549                                                 _("duplicate `default' detected in switch body"));
550                                 else
551                                         dflt->target_jmp = casestmt->nexti;
552
553                                 if (do_profiling) {
554                                         curr->stmt_start = casestmt->nexti;
555                                         curr->stmt_end = casestmt->lasti;
556                                         (void) list_prepend(cexp, curr);
557                                 } else
558                                         bcfree(curr);
559                         }
560
561                         cstmt = list_merge(casestmt, cstmt);
562                 }
563
564                 if (case_values != NULL)
565                         efree(case_values);
566
567                 ip = $3;
568                 if (do_profiling) {
569                         (void) list_prepend(ip, $1);
570                         (void) list_prepend(ip, instruction(Op_exec_count));
571                         $1->target_break = tbreak;
572                         ($1 + 1)->switch_start = cexp->nexti;
573                         ($1 + 1)->switch_end = cexp->lasti;
574                 }/* else
575                                 $1 is NULL */
576
577                 (void) list_append(cexp, dflt);
578                 (void) list_merge(ip, cexp);
579                 $$ = list_merge(ip, cstmt);
580
581                 break_allowed--;                        
582                 fix_break_continue(ip, tbreak, NULL);
583           }
584         | LEX_WHILE '(' exp r_paren opt_nls statement
585           { 
586                 /*
587                  *    -----------------
588                  * tc:
589                  *         cond
590                  *    -----------------
591                  *    [Op_jmp_false tb   ]
592                  *    -----------------   
593                  *         body
594                  *    -----------------
595                  *    [Op_jmp      tc    ]
596                  * tb:[Op_no_op          ]
597                  */
598
599                 INSTRUCTION *ip, *tbreak, *tcont;
600
601                 tbreak = instruction(Op_no_op);
602                 add_lint($3, LINT_assign_in_cond);
603                 tcont = $3->nexti;
604                 ip = list_append($3, instruction(Op_jmp_false));
605                 ip->lasti->target_jmp = tbreak;
606
607                 if (do_profiling) {
608                         (void) list_append(ip, instruction(Op_exec_count));
609                         $1->target_break = tbreak;
610                         $1->target_continue = tcont;
611                         ($1 + 1)->while_body = ip->lasti;
612                         (void) list_prepend(ip, $1);
613                 }/* else
614                                 $1 is NULL */
615
616                 if ($6 != NULL)
617                         (void) list_merge(ip, $6);
618                 (void) list_append(ip, instruction(Op_jmp));
619                 ip->lasti->target_jmp = tcont;
620                 $$ = list_append(ip, tbreak);
621
622                 break_allowed--;
623                 continue_allowed--;
624                 fix_break_continue(ip, tbreak, tcont);
625           }
626         | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
627           {
628                 /*
629                  *    -----------------
630                  * z:
631                  *         body
632                  *    -----------------
633                  * tc: 
634                  *         cond
635                  *    -----------------
636                  *    [Op_jmp_true | z  ]
637                  * tb:[Op_no_op         ]
638                  */
639
640                 INSTRUCTION *ip, *tbreak, *tcont;
641
642                 tbreak = instruction(Op_no_op);
643                 tcont = $6->nexti;
644                 add_lint($6, LINT_assign_in_cond);
645                 if ($3 != NULL)
646                         ip = list_merge($3, $6);
647                 else
648                         ip = list_prepend($6, instruction(Op_no_op));
649                 if (do_profiling)
650                         (void) list_prepend(ip, instruction(Op_exec_count));
651                 (void) list_append(ip, instruction(Op_jmp_true));
652                 ip->lasti->target_jmp = ip->nexti;
653                 $$ = list_append(ip, tbreak);
654
655                 break_allowed--;
656                 continue_allowed--;
657                 fix_break_continue(ip, tbreak, tcont);
658
659                 if (do_profiling) {
660                         $1->target_break = tbreak;
661                         $1->target_continue = tcont;
662                         ($1 + 1)->doloop_cond = tcont;
663                         $$ = list_prepend(ip, $1);
664                         bcfree($4);
665                 } /* else
666                                 $1 and $4 are NULLs */
667           }
668         | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
669           {
670                 INSTRUCTION *ip;
671                 char *var_name = $3->lextok;
672
673                 if ($8 != NULL
674                                 && $8->lasti->opcode == Op_K_delete
675                                 && $8->lasti->expr_count == 1
676                                 && $8->nexti->opcode == Op_push
677                                 && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
678                                 && strcmp($8->nexti->memory->vname, var_name) == 0
679                 ) {
680                 
681                 /* Efficiency hack.  Recognize the special case of
682                  *
683                  *      for (iggy in foo)
684                  *              delete foo[iggy]
685                  *
686                  * and treat it as if it were
687                  *
688                  *      delete foo
689                  *
690                  * Check that the body is a `delete a[i]' statement,
691                  * and that both the loop var and array names match.
692                  */              
693                         NODE *arr = NULL;
694
695                         ip = $8->nexti->nexti; 
696                         if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
697                                 arr = $5->nexti->memory;
698                         if (arr != NULL
699                                         && ip->opcode == Op_no_op
700                                         && ip->nexti->opcode == Op_push_array
701                                         && strcmp(ip->nexti->memory->vname, arr->vname) == 0
702                                         && ip->nexti->nexti == $8->lasti
703                         ) {
704                                 (void) make_assignable($8->nexti);
705                                 $8->lasti->opcode = Op_K_delete_loop;
706                                 $8->lasti->expr_count = 0;
707                                 if ($1 != NULL)
708                                         bcfree($1);
709                                 efree(var_name);
710                                 bcfree($3);
711                                 bcfree($4);
712                                 bcfree($5);
713                                 $$ = $8;
714                         } else
715                                 goto regular_loop;
716                 } else {
717                         INSTRUCTION *tbreak, *tcont;
718
719         /*    [ Op_push_array a       ]
720          *    [ Op_arrayfor_init | ib ]
721          * ic:[ Op_arrayfor_incr | ib ] 
722          *    [ Op_var_assign if any  ]
723          *
724          *              body
725          *
726          *    [Op_jmp | ic            ]
727          * ib:[Op_arrayfor_final      ]
728          */
729 regular_loop:
730                         ip = $5;
731                         ip->nexti->opcode = Op_push_array;
732
733                         tbreak = instruction(Op_arrayfor_final);
734                         $4->opcode = Op_arrayfor_incr;
735                         $4->array_var = variable(var_name, Node_var);
736                         $4->target_jmp = tbreak;
737                         tcont = $4;
738                         $3->opcode = Op_arrayfor_init;
739                         $3->target_jmp = tbreak;
740                         (void) list_append(ip, $3);
741
742                         if (do_profiling) {
743                                 $1->opcode = Op_K_arrayfor;
744                                 $1->target_continue = tcont;
745                                 $1->target_break = tbreak;
746                                 (void) list_append(ip, $1);
747                         } /* else
748                                         $1 is NULL */
749
750                         /* add update_FOO instruction if necessary */ 
751                         if ($4->array_var->type == Node_var && $4->array_var->var_update) {
752                                 (void) list_append(ip, instruction(Op_var_update));
753                                 ip->lasti->update_var = $4->array_var->var_update;
754                         }
755                         (void) list_append(ip, $4);
756
757                         /* add set_FOO instruction if necessary */
758                         if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
759                                 (void) list_append(ip, instruction(Op_var_assign));
760                                 ip->lasti->assign_var = $4->array_var->var_assign;
761                         }
762
763                         if (do_profiling) {
764                                 (void) list_append(ip, instruction(Op_exec_count));
765                                 ($1 + 1)->forloop_cond = $4;
766                                 ($1 + 1)->forloop_body = ip->lasti; 
767                         }
768
769                         if ($8 != NULL)
770                                 (void) list_merge(ip, $8);
771
772                         (void) list_append(ip, instruction(Op_jmp));
773                         ip->lasti->target_jmp = $4;
774                         $$ = list_append(ip, tbreak);
775                         fix_break_continue(ip, tbreak, tcont);
776                 } 
777
778                 break_allowed--;
779                 continue_allowed--;
780           }
781         | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
782           {
783                 $$ = mk_for_loop($1, $3, $6, $9, $12);
784
785                 break_allowed--;
786                 continue_allowed--;
787           }
788         | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
789           {
790                 $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
791
792                 break_allowed--;
793                 continue_allowed--;
794           }
795         | non_compound_stmt
796           {
797                 if (do_profiling)
798                         $$ = list_prepend($1, instruction(Op_exec_count));
799                 else
800                         $$ = $1;
801           }
802         ;
803
804 non_compound_stmt
805         : LEX_BREAK statement_term
806           { 
807                 if (! break_allowed)
808                         error_ln($1->source_line,
809                                 _("`break' is not allowed outside a loop or switch"));
810                 $1->target_jmp = NULL;
811                 $$ = list_create($1);
812
813           }
814         | LEX_CONTINUE statement_term
815           {
816                 if (! continue_allowed)
817                         error_ln($1->source_line,
818                                 _("`continue' is not allowed outside a loop"));
819                 $1->target_jmp = NULL;
820                 $$ = list_create($1);
821
822           }
823         | LEX_NEXT statement_term
824           {
825                 /* if inside function (rule = 0), resolve context at run-time */
826                 if (rule && rule != Rule)
827                         error_ln($1->source_line,
828                                 _("`next' used in %s action"), ruletab[rule]);
829                 $1->target_jmp = ip_rec;
830                 $$ = list_create($1);
831           }
832         | LEX_NEXTFILE statement_term
833           {
834                 if (do_traditional)
835                         error_ln($1->source_line,
836                                 _("`nextfile' is a gawk extension"));
837
838                 /* if inside function (rule = 0), resolve context at run-time */
839                 if (rule == BEGIN || rule == END || rule == ENDFILE)
840                         error_ln($1->source_line,
841                                 _("`nextfile' used in %s action"), ruletab[rule]);
842
843                 $1->target_newfile = ip_newfile;
844                 $1->target_endfile = ip_endfile;
845                 $$ = list_create($1);
846           }
847         | LEX_EXIT opt_exp statement_term
848           {
849                 /* Initialize the two possible jump targets, the actual target
850                  * is resolved at run-time. 
851                  */
852                 $1->target_end = ip_end;        /* first instruction in end_block */
853                 $1->target_atexit = ip_atexit;  /* cleanup and go home */
854
855                 if ($2 == NULL) {
856                         $$ = list_create($1);
857                         (void) list_prepend($$, instruction(Op_push_i));
858                         $$->nexti->memory = Nnull_string;
859                 } else
860                         $$ = list_append($2, $1);
861           }
862         | LEX_RETURN
863           {
864                 if (! can_return)
865                         yyerror(_("`return' used outside function context"));
866           } opt_exp statement_term {
867                 if ($3 == NULL) {
868                         $$ = list_create($1);
869                         (void) list_prepend($$, instruction(Op_push_i));
870                         $$->nexti->memory = Nnull_string;
871                 } else
872                         $$ = list_append($3, $1);
873           }
874         | simple_stmt statement_term
875         ;
876
877         /*
878          * A simple_stmt exists to satisfy a constraint in the POSIX
879          * grammar allowing them to occur as the 1st and 3rd parts
880          * in a `for (...;...;...)' loop.  This is a historical oddity
881          * inherited from Unix awk, not at all documented in the AK&W
882          * awk book.  We support it, as this was reported as a bug.
883          * We don't bother to document it though. So there.
884          */
885 simple_stmt
886         : print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
887           {
888                 /*
889                  * Optimization: plain `print' has no expression list, so $3 is null.
890                  * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
891                  * which is faster for these two cases.
892                  */
893
894                 if ($1->opcode == Op_K_print &&
895                                 ($3 == NULL
896                                         || ($3->lasti->opcode == Op_field_spec
897                                                 && $3->nexti->nexti->nexti == $3->lasti
898                                                 && $3->nexti->nexti->opcode == Op_push_i
899                                                 && $3->nexti->nexti->memory->type == Node_val
900                                                 && $3->nexti->nexti->memory->numbr == 0.0)
901                                 )
902                 ) {
903                         static short warned = FALSE;
904                         /*   -----------------
905                          *      output_redir
906                          *    [ redirect exp ]
907                          *   -----------------
908                          *     expression_list
909                          *   ------------------
910                          *    [Op_K_print_rec | NULL | redir_type | expr_count]
911                          */
912
913                         if ($3 != NULL) {
914                                 bcfree($3->lasti);                              /* Op_field_spec */
915                                 $3->nexti->nexti->memory->flags &= ~PERM;
916                                 $3->nexti->nexti->memory->flags |= MALLOC;                      
917                                 unref($3->nexti->nexti->memory);        /* Node_val */
918                                 bcfree($3->nexti->nexti);               /* Op_push_i */
919                                 bcfree($3->nexti);                              /* Op_list */
920                                 bcfree($3);                                             /* Op_list */
921                         } else {
922                                 if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
923                                         warned = TRUE;
924                                         lintwarn_ln($1->source_line,
925                 _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
926                                 }
927                         }
928
929                         $1->expr_count = 0;
930                         $1->opcode = Op_K_print_rec;
931                         if ($4 == NULL) {    /* no redircetion */
932                                 $1->redir_type = 0;
933                                 $$ = list_create($1);
934                         } else {
935                                 INSTRUCTION *ip;
936                                 ip = $4->nexti;
937                                 $1->redir_type = ip->redir_type;
938                                 $4->nexti = ip->nexti;
939                                 bcfree(ip);
940                                 $$ = list_append($4, $1);
941                         }
942                 } else {
943                         /*   -----------------
944                          *    [ output_redir    ]
945                          *    [ redirect exp    ]
946                          *   -----------------
947                          *    [ expression_list ]
948                          *   ------------------
949                          *    [$1 | NULL | redir_type | expr_count]
950                          *
951                          */
952                          
953                         if ($4 == NULL) {               /* no redirection */
954                                 if ($3 == NULL) {       /* printf without arg */
955                                         $1->expr_count = 0;
956                                         $1->redir_type = 0;
957                                         $$ = list_create($1);
958                                 } else {
959                                         INSTRUCTION *t = $3;
960                                         $1->expr_count = count_expressions(&t, FALSE);
961                                         $1->redir_type = 0;
962                                         $$ = list_append(t, $1);
963                                 }
964                         } else {
965                                 INSTRUCTION *ip;
966                                 ip = $4->nexti;
967                                 $1->redir_type = ip->redir_type;
968                                 $4->nexti = ip->nexti;
969                                 bcfree(ip);
970                                 if ($3 == NULL) {
971                                         $1->expr_count = 0;
972                                         $$ = list_append($4, $1);
973                                 } else {
974                                         INSTRUCTION *t = $3;
975                                         $1->expr_count = count_expressions(&t, FALSE);
976                                         $$ = list_append(list_merge($4, t), $1);
977                                 }
978                         }
979                 }
980           }
981
982         | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
983           {
984                 char *arr = $2->lextok;
985
986                 $2->opcode = Op_push_array;
987                 $2->memory = variable(arr, Node_var_new);
988
989                 if ($4 == NULL) {
990                         static short warned = FALSE;
991
992                         if (do_lint && ! warned) {
993                                 warned = TRUE;
994                                 lintwarn_ln($1->source_line,
995                                         _("`delete array' is a gawk extension"));
996                         }
997                         if (do_traditional)
998                                 error_ln($1->source_line,
999                                         _("`delete array' is a gawk extension"));
1000                         $1->expr_count = 0;
1001                         $$ = list_append(list_create($2), $1);
1002                 } else {
1003                         $1->expr_count = sub_counter;
1004                         $$ = list_append(list_append($4, $2), $1);
1005                 }
1006           }     
1007         | LEX_DELETE '(' NAME ')'
1008                   /*
1009                    * this is for tawk compatibility. maybe the warnings
1010                    * should always be done.
1011                    */
1012           {
1013                 static short warned = FALSE;
1014                 char *arr = $3->lextok;
1015
1016                 if (do_lint && ! warned) {
1017                         warned = TRUE;
1018                         lintwarn_ln($1->source_line,
1019                                 _("`delete(array)' is a non-portable tawk extension"));
1020                 }
1021                 if (do_traditional) {
1022                         error_ln($1->source_line,
1023                                 _("`delete array' is a gawk extension"));
1024                 }
1025                 $3->memory = variable(arr, Node_var_new);
1026                 $3->opcode = Op_push_array;
1027                 $1->expr_count = 0;
1028                 $$ = list_append(list_create($3), $1);
1029           }
1030         | exp
1031           {     $$ = optimize_assignment($1); }
1032         ;
1033
1034 opt_simple_stmt
1035         : /* empty */
1036           { $$ = NULL; }
1037         | simple_stmt
1038           { $$ = $1; }
1039         ;
1040
1041 case_statements
1042         : /* empty */
1043           { $$ = NULL; }
1044         | case_statements case_statement
1045           {
1046                 if ($1 == NULL)
1047                         $$ = list_create($2);
1048                 else
1049                         $$ = list_prepend($1, $2);
1050           }
1051         | case_statements error
1052           { $$ = NULL; }
1053         ;
1054
1055 case_statement
1056         : LEX_CASE case_value colon opt_nls statements
1057           {
1058                 INSTRUCTION *casestmt = $5;
1059                 if ($5 == NULL)
1060                         casestmt = list_create(instruction(Op_no_op));  
1061                 if (do_profiling)
1062                         (void) list_prepend(casestmt, instruction(Op_exec_count));
1063                 $1->case_exp = $2;
1064                 $1->case_stmt = casestmt;
1065                 bcfree($3);
1066                 $$ = $1;
1067           }
1068         | LEX_DEFAULT colon opt_nls statements
1069           {
1070                 INSTRUCTION *casestmt = $4;
1071                 if ($4 == NULL)
1072                         casestmt = list_create(instruction(Op_no_op));
1073                 if (do_profiling)
1074                         (void) list_prepend(casestmt, instruction(Op_exec_count));
1075                 bcfree($2);
1076                 $1->case_stmt = casestmt;
1077                 $$ = $1;
1078           }
1079         ;
1080
1081 case_value
1082         : YNUMBER
1083           {     $$ = $1; }
1084         | '-' YNUMBER    %prec UNARY
1085           { 
1086                 $2->memory->numbr = -(force_number($2->memory));
1087                 bcfree($1);
1088                 $$ = $2;
1089           }
1090         | '+' YNUMBER    %prec UNARY
1091           {
1092                 bcfree($1);
1093                 $$ = $2;
1094           }
1095         | YSTRING 
1096           {     $$ = $1; }
1097         | regexp  
1098           {
1099                 $1->opcode = Op_push_re;
1100                 $$ = $1;
1101           }
1102         ;
1103
1104 print
1105         : LEX_PRINT
1106           { $$ = $1; }
1107         | LEX_PRINTF
1108           { $$ = $1; }
1109         ;
1110
1111         /*
1112          * Note: ``print(x)'' is already parsed by the first rule,
1113          * so there is no good in covering it by the second one too.
1114          */
1115 print_expression_list
1116         : opt_expression_list
1117         | '(' expression_list r_paren
1118           {
1119                 $$ = $2;
1120           }
1121         ;
1122
1123 output_redir
1124         : /* empty */
1125           {
1126                 in_print = FALSE;
1127                 in_parens = 0;
1128                 $$ = NULL;
1129           }
1130         | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
1131           {
1132                 if ($1->redir_type == redirect_twoway
1133                         && $3->lasti->opcode == Op_K_getline_redir
1134                                 && $3->lasti->redir_type == redirect_twoway)
1135                         yyerror(_("multistage two-way pipelines don't work"));
1136                 $$ = list_prepend($3, $1);
1137           }
1138         ;
1139
1140 if_statement
1141         : LEX_IF '(' exp r_paren opt_nls statement
1142           {
1143                 $$ = mk_condition($3, $1, $6, NULL, NULL);
1144           }
1145         | LEX_IF '(' exp r_paren opt_nls statement
1146              LEX_ELSE opt_nls statement
1147           {
1148                 $$ = mk_condition($3, $1, $6, $7, $9);
1149           }
1150         ;
1151
1152 nls
1153         : NEWLINE
1154         | nls NEWLINE
1155         ;
1156
1157 opt_nls
1158         : /* empty */
1159         | nls
1160         ;
1161
1162 input_redir
1163         : /* empty */
1164           { $$ = NULL; }
1165         | '<' simp_exp
1166           {
1167                 bcfree($1);
1168                 $$ = $2;
1169           }
1170         ;
1171
1172 opt_param_list
1173         : /* empty */
1174         | param_list
1175         ;
1176
1177 param_list
1178         : NAME
1179           {
1180                 append_param($1->lextok);
1181                 $1->lextok = NULL;
1182                 bcfree($1);
1183           }
1184         | param_list comma NAME
1185           {
1186                 append_param($3->lextok);
1187                 $3->lextok = NULL;
1188                 bcfree($3);
1189                 yyerrok;
1190           }
1191         | error
1192           { /* func_params = NULL; */ }
1193         | param_list error
1194           { /* func_params = NULL; */ }
1195         | param_list comma error
1196           { /* func_params = NULL; */ }
1197         ;
1198
1199 /* optional expression, as in for loop */
1200 opt_exp
1201         : /* empty */
1202           { $$ = NULL; }
1203         | exp
1204           { $$ = $1; }
1205         ;
1206
1207 opt_expression_list
1208         : /* empty */
1209           { $$ = NULL; }
1210         | expression_list
1211           { $$ = $1; }
1212         ;
1213
1214 expression_list
1215         : exp
1216           {     $$ = mk_expression_list(NULL, $1); }
1217         | expression_list comma exp
1218           {
1219                 $$ = mk_expression_list($1, $3);
1220                 yyerrok;
1221           }
1222         | error
1223           { $$ = NULL; }
1224         | expression_list error
1225           { $$ = NULL; }
1226         | expression_list error exp
1227           { $$ = NULL; }
1228         | expression_list comma error
1229           { $$ = NULL; }
1230         ;
1231
1232 /* Expressions, not including the comma operator.  */
1233 exp
1234         : variable assign_operator exp %prec ASSIGNOP
1235           {
1236                 if (do_lint && $3->lasti->opcode == Op_match_rec)
1237                         lintwarn_ln($2->source_line,
1238                                 _("regular expression on right of assignment"));
1239                 $$ = mk_assignment($1, $3, $2);
1240           }
1241         | exp LEX_AND exp
1242           {     $$ = mk_boolean($1, $3, $2); }
1243         | exp LEX_OR exp
1244           {     $$ = mk_boolean($1, $3, $2); }
1245         | exp MATCHOP exp
1246           {
1247                 if ($1->lasti->opcode == Op_match_rec)
1248                         warning_ln($2->source_line,
1249                                 _("regular expression on left of `~' or `!~' operator"));
1250
1251                 if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
1252                         $2->memory = $3->nexti->memory;
1253                         bcfree($3->nexti);      /* Op_match_rec */
1254                         bcfree($3);                     /* Op_list */
1255                         $$ = list_append($1, $2);
1256                 } else {
1257                         $2->memory = make_regnode(Node_dynregex, NULL);
1258                         $$ = list_append(list_merge($1, $3), $2);
1259                 }
1260           }
1261         | exp LEX_IN simple_variable
1262           {
1263                 if (do_lint_old)
1264                   warning_ln($2->source_line,
1265                                 _("old awk does not support the keyword `in' except after `for'"));
1266                 $3->nexti->opcode = Op_push_array;
1267                 $2->opcode = Op_in_array;
1268                 $2->expr_count = 1;
1269                 $$ = list_append(list_merge($1, $3), $2);
1270           }
1271         | exp a_relop exp %prec RELOP
1272           {
1273                 if (do_lint && $3->lasti->opcode == Op_match_rec)
1274                         lintwarn_ln($2->source_line,
1275                                 _("regular expression on right of comparison"));
1276                 $$ = list_append(list_merge($1, $3), $2);
1277           }
1278         | exp '?' exp ':' exp
1279           { $$ = mk_condition($1, $2, $3, $4, $5); }
1280         | common_exp
1281           { $$ = $1; }
1282         ;
1283
1284 assign_operator
1285         : ASSIGN
1286           { $$ = $1; }
1287         | ASSIGNOP
1288           { $$ = $1; }
1289         | SLASH_BEFORE_EQUAL ASSIGN   /* `/=' */
1290           {     
1291                 $2->opcode = Op_assign_quotient;
1292                 $$ = $2;
1293           }
1294         ;
1295
1296 relop_or_less
1297         : RELOP
1298           { $$ = $1; }
1299         | '<'
1300           { $$ = $1; }
1301         ;
1302
1303 a_relop
1304         : relop_or_less
1305           { $$ = $1; }
1306         | '>'
1307           { $$ = $1; }
1308         ;
1309
1310 common_exp
1311         : simp_exp
1312           { $$ = $1; }
1313         | simp_exp_nc
1314           { $$ = $1; }
1315         | common_exp simp_exp %prec CONCAT_OP
1316           {
1317                 int count = 2;
1318                 int is_simple_var = FALSE;
1319
1320                 if ($1->lasti->opcode == Op_concat) {
1321                         /* multiple (> 2) adjacent strings optimization */
1322                         is_simple_var = ($1->lasti->concat_flag & CSVAR);
1323                         count = $1->lasti->expr_count + 1;
1324                         $1->lasti->opcode = Op_no_op;
1325                 } else {
1326                         is_simple_var = ($1->nexti->opcode == Op_push
1327                                                 && $1->lasti == $1->nexti); /* first exp. is a simple
1328                                                                              * variable?; kludge for use
1329                                                                              * in Op_assign_concat.
1330                                                                              */
1331                 }
1332
1333                 if (do_optimize > 1
1334                                 && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
1335                                 && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
1336                 ) {
1337                         NODE *n1 = $1->nexti->memory;
1338                         NODE *n2 = $2->nexti->memory;
1339                         size_t nlen;
1340
1341                         (void) force_string(n1);
1342                         (void) force_string(n2);
1343                         nlen = n1->stlen + n2->stlen;
1344                         erealloc(n1->stptr, char *, nlen + 2, "constant fold");
1345                         memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
1346                         n1->stlen = nlen;
1347                         n1->stptr[nlen] = '\0';
1348                         n1->flags &= ~(NUMCUR|NUMBER);
1349                         n1->flags |= (STRING|STRCUR);
1350
1351                         n2->flags &= ~PERM;
1352                         n2->flags |= MALLOC;
1353                         unref(n2);
1354                         bcfree($2->nexti);
1355                         bcfree($2);
1356                         $$ = $1;
1357                 } else {
1358                         $$ = list_append(list_merge($1, $2), instruction(Op_concat));
1359                         $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
1360                         $$->lasti->expr_count = count;
1361                         if (count > max_args)
1362                                 max_args = count;
1363                 }
1364           }
1365         ;
1366
1367 simp_exp
1368         : non_post_simp_exp
1369         /* Binary operators in order of decreasing precedence.  */
1370         | simp_exp '^' simp_exp
1371           { $$ = mk_binary($1, $3, $2); }
1372         | simp_exp '*' simp_exp
1373           { $$ = mk_binary($1, $3, $2); }
1374         | simp_exp '/' simp_exp
1375           { $$ = mk_binary($1, $3, $2); }
1376         | simp_exp '%' simp_exp
1377           { $$ = mk_binary($1, $3, $2); }
1378         | simp_exp '+' simp_exp
1379           { $$ = mk_binary($1, $3, $2); }
1380         | simp_exp '-' simp_exp
1381           { $$ = mk_binary($1, $3, $2); }
1382         | LEX_GETLINE opt_variable input_redir
1383           {
1384                 /*
1385                  * In BEGINFILE/ENDFILE, allow `getline var < file'
1386                  */
1387
1388                 if (rule == BEGINFILE || rule == ENDFILE) {
1389                         if ($2 != NULL && $3 != NULL)
1390                                 ;        /* all  ok */
1391                         else {
1392                                 if ($2 != NULL)
1393                                         error_ln($1->source_line,
1394                                                 _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
1395                                 else
1396                                         error_ln($1->source_line,
1397                                                 _("`getline' invalid inside `%s' rule"), ruletab[rule]);
1398                         }
1399                 }
1400                 if (do_lint && rule == END && $3 == NULL)
1401                         lintwarn_ln($1->source_line,
1402                                 _("non-redirected `getline' undefined inside END action"));
1403                 $$ = mk_getline($1, $2, $3, redirect_input);
1404           }
1405         | variable INCREMENT
1406           {
1407                 $2->opcode = Op_postincrement;
1408                 $$ = mk_assignment($1, NULL, $2);
1409           }
1410         | variable DECREMENT
1411           {
1412                 $2->opcode = Op_postdecrement;
1413                 $$ = mk_assignment($1, NULL, $2);
1414           }
1415         | '(' expression_list r_paren LEX_IN simple_variable
1416           {
1417                 if (do_lint_old) {
1418                     warning_ln($4->source_line,
1419                                 _("old awk does not support the keyword `in' except after `for'"));
1420                     warning_ln($4->source_line,
1421                                 _("old awk does not support multidimensional arrays"));
1422                 }
1423                 $5->nexti->opcode = Op_push_array;
1424                 $4->opcode = Op_in_array;
1425                 if ($2 == NULL) {       /* error */
1426                         errcount++;
1427                         $4->expr_count = 0;
1428                         $$ = list_merge($5, $4);
1429                 } else {
1430                         INSTRUCTION *t = $2;
1431                         $4->expr_count = count_expressions(&t, FALSE);
1432                         $$ = list_append(list_merge(t, $5), $4);
1433                 }
1434           }
1435         ;
1436
1437 /* Expressions containing "| getline" lose the ability to be on the
1438    right-hand side of a concatenation. */
1439 simp_exp_nc
1440         : common_exp IO_IN LEX_GETLINE opt_variable
1441                 {
1442                   $$ = mk_getline($3, $4, $1, $2->redir_type);
1443                   bcfree($2);
1444                 }
1445         /* Binary operators in order of decreasing precedence.  */
1446         | simp_exp_nc '^' simp_exp
1447           { $$ = mk_binary($1, $3, $2); }
1448         | simp_exp_nc '*' simp_exp
1449           { $$ = mk_binary($1, $3, $2); }
1450         | simp_exp_nc '/' simp_exp
1451           { $$ = mk_binary($1, $3, $2); }
1452         | simp_exp_nc '%' simp_exp
1453           { $$ = mk_binary($1, $3, $2); }
1454         | simp_exp_nc '+' simp_exp
1455           { $$ = mk_binary($1, $3, $2); }
1456         | simp_exp_nc '-' simp_exp
1457           { $$ = mk_binary($1, $3, $2); }
1458         ;
1459
1460 non_post_simp_exp
1461         : regexp
1462           {
1463                 $$ = list_create($1);
1464           }
1465         | '!' simp_exp %prec UNARY
1466           {
1467                 if ($2->opcode == Op_match_rec) {
1468                         $2->opcode = Op_nomatch;
1469                         $1->opcode = Op_push_i;
1470                         $1->memory = mk_number(0.0, (PERM|NUMCUR|NUMBER));      
1471                         $$ = list_append(list_append(list_create($1),
1472                                                                 instruction(Op_field_spec)), $2);
1473                 } else {
1474                         if (do_optimize > 1 && $2->nexti == $2->lasti
1475                                                         && $2->nexti->opcode == Op_push_i
1476                         ) {
1477                                 NODE *n = $2->nexti->memory;
1478                                 if ((n->flags & (STRCUR|STRING)) != 0) {
1479                                         n->numbr = (AWKNUM) (n->stlen == 0);
1480                                         n->flags &= ~(STRCUR|STRING);
1481                                         n->flags |= (NUMCUR|NUMBER);
1482                                         efree(n->stptr);
1483                                         n->stptr = NULL;
1484                                         n->stlen = 0;
1485                                 } else
1486                                         n->numbr = (AWKNUM) (n->numbr == 0.0);
1487                                 bcfree($1);
1488                                 $$ = $2;
1489                         } else {
1490                                 $1->opcode = Op_not;
1491                                 add_lint($2, LINT_assign_in_cond);
1492                                 $$ = list_append($2, $1);
1493                         }
1494                 }
1495            }
1496         | '(' exp r_paren
1497           { $$ = $2; }
1498         | LEX_BUILTIN '(' opt_expression_list r_paren
1499           {
1500                 $$ = snode($3, $1);
1501                 if ($$ == NULL)
1502                         YYABORT;
1503           }
1504         | LEX_LENGTH '(' opt_expression_list r_paren
1505           {
1506                 $$ = snode($3, $1);
1507                 if ($$ == NULL)
1508                         YYABORT;
1509           }
1510         | LEX_LENGTH
1511           {
1512                 static short warned1 = FALSE;
1513
1514                 if (do_lint && ! warned1) {
1515                         warned1 = TRUE;
1516                         lintwarn_ln($1->source_line,
1517                                 _("call of `length' without parentheses is not portable"));
1518                 }
1519                 $$ = snode(NULL, $1);
1520                 if ($$ == NULL)
1521                         YYABORT;
1522           }
1523         | func_call
1524         | variable
1525         | INCREMENT variable
1526           {
1527                 $1->opcode = Op_preincrement;
1528                 $$ = mk_assignment($2, NULL, $1);
1529           }
1530         | DECREMENT variable
1531           {
1532                 $1->opcode = Op_predecrement;
1533                 $$ = mk_assignment($2, NULL, $1);
1534           }
1535         | YNUMBER
1536           {
1537                 $$ = list_create($1);
1538           }
1539         | YSTRING
1540           {
1541                 $$ = list_create($1);
1542           }
1543         | '-' simp_exp    %prec UNARY
1544           {
1545                 if ($2->lasti->opcode == Op_push_i
1546                                 && ($2->lasti->memory->flags & (STRCUR|STRING)) == 0) {
1547                         $2->lasti->memory->numbr = -(force_number($2->lasti->memory));
1548                         $$ = $2;
1549                         bcfree($1);
1550                 } else {
1551                         $1->opcode = Op_unary_minus;
1552                         $$ = list_append($2, $1);
1553                 }
1554           }
1555         | '+' simp_exp    %prec UNARY
1556           {
1557             /*
1558              * was: $$ = $2
1559              * POSIX semantics: force a conversion to numeric type
1560              */
1561                 $1->opcode = Op_plus_i;
1562                 $1->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
1563                 $$ = list_append($2, $1);
1564           }
1565         ;
1566
1567 func_call
1568         : direct_func_call
1569           {
1570                 func_use($1->lasti->func_name, FUNC_USE);
1571                 $$ = $1;
1572           }
1573         | '@' direct_func_call
1574           {
1575                 /* indirect function call */
1576                 INSTRUCTION *f, *t;
1577                 char *name;
1578                 NODE *indirect_var;
1579                 static short warned = FALSE;
1580                 const char *msg = _("indirect function calls are a gawk extension");
1581
1582                 if (do_traditional || do_posix)
1583                         yyerror("%s", msg);
1584                 else if (do_lint && ! warned) {
1585                         warned = TRUE;
1586                         lintwarn("%s", msg);
1587                 }
1588                 
1589                 f = $2->lasti;
1590                 f->opcode = Op_indirect_func_call;
1591                 name = estrdup(f->func_name, strlen(f->func_name));
1592                 if (is_std_var(name))
1593                         yyerror(_("can not use special variable `%s' for indirect function call"), name);
1594                 indirect_var = variable(name, Node_var_new);
1595                 t = instruction(Op_push);
1596                 t->memory = indirect_var;
1597
1598                 /* prepend indirect var instead of appending to arguments (opt_expression_list),
1599                  * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
1600                  *              f = "fun"
1601                  *              @f(f="real_fun")
1602                  */
1603
1604                 $$ = list_prepend($2, t);
1605           }
1606         ;
1607
1608 direct_func_call
1609         : FUNC_CALL '(' opt_expression_list r_paren
1610           {
1611                 param_sanity($3);
1612                 $1->opcode = Op_func_call;
1613                 $1->func_body = NULL;
1614                 if ($3 == NULL) {       /* no argument or error */
1615                         ($1 + 1)->expr_count = 0;
1616                         $$ = list_create($1);
1617                 } else {
1618                         INSTRUCTION *t = $3;
1619                         ($1 + 1)->expr_count = count_expressions(&t, TRUE); 
1620                         $$ = list_append(t, $1);
1621                 }
1622           }
1623         ;
1624
1625 opt_variable
1626         : /* empty */
1627           { $$ = NULL; }
1628         | variable
1629           { $$ = $1; }
1630         ;
1631
1632 delete_subscript_list
1633         : /* empty */
1634           { $$ = NULL; }
1635         | delete_subscript SUBSCRIPT
1636           { $$ = $1; }
1637         ;
1638
1639 delete_subscript
1640         : delete_exp_list
1641           {     $$ = $1; }
1642         | delete_subscript delete_exp_list
1643           {
1644                 $$ = list_merge($1, $2);
1645           }
1646         ;
1647
1648 delete_exp_list
1649         : bracketed_exp_list
1650           {
1651                 INSTRUCTION *ip = $1->lasti; 
1652                 int count = ip->sub_count;      /* # of SUBSEP-seperated expressions */
1653                 if (count > 1) {
1654                         /* change Op_subscript or Op_sub_array to Op_concat */
1655                         ip->opcode = Op_concat;
1656                         ip->concat_flag = CSUBSEP;
1657                         ip->expr_count = count;
1658                 } else
1659                         ip->opcode = Op_no_op;
1660                 sub_counter++;  /* count # of dimensions */
1661                 $$ = $1;
1662           }
1663         ;
1664
1665 bracketed_exp_list
1666         : '[' expression_list ']'
1667           {
1668                 INSTRUCTION *t = $2;
1669                 if ($2 == NULL) {
1670                         error_ln($3->source_line,
1671                                 _("invalid subscript expression"));
1672                         /* install Null string as subscript. */
1673                         t = list_create(instruction(Op_push_i));
1674                         t->nexti->memory = Nnull_string;
1675                         $3->sub_count = 1;                      
1676                 } else
1677                         $3->sub_count = count_expressions(&t, FALSE);
1678                 $$ = list_append(t, $3);
1679           }
1680         ;
1681
1682 subscript
1683         : bracketed_exp_list
1684           {     $$ = $1; }
1685         | subscript bracketed_exp_list
1686           {
1687                 $$ = list_merge($1, $2);
1688           }
1689         ;
1690
1691 subscript_list
1692         : subscript SUBSCRIPT
1693           { $$ = $1; }
1694         ;
1695
1696 simple_variable
1697         : NAME
1698           {
1699                 char *var_name = $1->lextok;
1700
1701                 $1->opcode = Op_push;
1702                 $1->memory = variable(var_name, Node_var_new);
1703                 $$ = list_create($1);
1704           }
1705         | NAME subscript_list
1706           {
1707                 NODE *n;
1708
1709                 char *arr = $1->lextok;
1710                 if ((n = lookup(arr)) != NULL && ! isarray(n))
1711                         yyerror(_("use of non-array as array"));
1712                 $1->memory = variable(arr, Node_var_new);
1713                 $1->opcode = Op_push_array;
1714                 $$ = list_prepend($2, $1);
1715           }
1716         ;
1717
1718 variable
1719         : simple_variable
1720           {
1721                 INSTRUCTION *ip = $1->nexti;
1722                 if (ip->opcode == Op_push
1723                                 && ip->memory->type == Node_var
1724                                 && ip->memory->var_update
1725                 ) {
1726                         $$ = list_prepend($1, instruction(Op_var_update));
1727                         $$->nexti->update_var = ip->memory->var_update;
1728                 } else
1729                         $$ = $1;
1730           }
1731         | '$' non_post_simp_exp opt_incdec
1732           {
1733                 $$ = list_append($2, $1);
1734                 if ($3 != NULL)
1735                   mk_assignment($2, NULL, $3);
1736           }
1737         ;
1738
1739 opt_incdec
1740         : INCREMENT
1741           {
1742                 $1->opcode = Op_postincrement;
1743           }
1744         | DECREMENT
1745           {
1746                 $1->opcode = Op_postdecrement;
1747           }
1748         | /* empty */   { $$ = NULL; }
1749         ;
1750
1751 l_brace
1752         : '{' opt_nls
1753         ;
1754
1755 r_brace
1756         : '}' opt_nls   { yyerrok; }
1757         ;
1758
1759 r_paren
1760         : ')' { yyerrok; }
1761         ;
1762
1763 opt_semi
1764         : /* empty */
1765         | semi
1766         ;
1767
1768 semi
1769         : ';'   { yyerrok; }
1770         ;
1771
1772 colon
1773         : ':'   { $$ = $1; yyerrok; }
1774         ;
1775
1776 comma
1777         : ',' opt_nls   { yyerrok; }
1778         ;
1779 %%
1780
1781 struct token {
1782         const char *operator;   /* text to match */
1783         OPCODE value;                   /*  type */
1784         int class;                              /* lexical class */
1785         unsigned flags;                 /* # of args. allowed and compatability */
1786 #       define  ARGS    0xFF    /* 0, 1, 2, 3 args allowed (any combination */
1787 #       define  A(n)    (1<<(n))
1788 #       define  VERSION_MASK    0xFF00  /* old awk is zero */
1789 #       define  NOT_OLD         0x0100  /* feature not in old awk */
1790 #       define  NOT_POSIX       0x0200  /* feature not in POSIX */
1791 #       define  GAWKX           0x0400  /* gawk extension */
1792 #       define  RESX            0x0800  /* Bell Labs Research extension */
1793 #       define  BREAK           0x1000  /* break allowed inside */
1794 #       define  CONTINUE        0x2000  /* continue allowed inside */
1795         
1796         NODE *(*ptr)(int);      /* function that implements this keyword */
1797 };
1798
1799 #if 'a' == 0x81 /* it's EBCDIC */
1800 /* tokcompare --- lexicographically compare token names for sorting */
1801
1802 static int
1803 tokcompare(const void *l, const void *r)
1804 {
1805         struct token *lhs, *rhs;
1806
1807         lhs = (struct token *) l;
1808         rhs = (struct token *) r;
1809
1810         return strcmp(lhs->operator, rhs->operator);
1811 }
1812 #endif
1813
1814 /*
1815  * Tokentab is sorted ASCII ascending order, so it can be binary searched.
1816  * See check_special(), which sorts the table on EBCDIC systems.
1817  * Function pointers come from declarations in awk.h.
1818  */
1819
1820 static const struct token tokentab[] = {
1821 {"BEGIN",       Op_rule,         LEX_BEGIN,     0,              0},
1822 {"BEGINFILE",   Op_rule,         LEX_BEGINFILE, GAWKX,          0},
1823 {"END",         Op_rule,         LEX_END,       0,              0},
1824 {"ENDFILE",             Op_rule,         LEX_ENDFILE,   GAWKX,          0},
1825 #ifdef ARRAYDEBUG
1826 {"adump",       Op_builtin,    LEX_BUILTIN,     GAWKX|A(1),     do_adump},
1827 #endif
1828 {"and",         Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_and},
1829 {"asort",       Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_asort},
1830 {"asorti",      Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_asorti},
1831 {"atan2",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(2),   do_atan2},
1832 {"bindtextdomain",      Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2),        do_bindtextdomain},
1833 {"break",       Op_K_break,      LEX_BREAK,     0,              0},
1834 {"case",        Op_K_case,       LEX_CASE,      GAWKX,          0},
1835 {"close",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1)|A(2),      do_close},
1836 {"compl",       Op_builtin,    LEX_BUILTIN,     GAWKX|A(1),     do_compl},
1837 {"continue",    Op_K_continue, LEX_CONTINUE,    0,              0},
1838 {"cos",         Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_cos},
1839 {"dcgettext",   Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_dcgettext},
1840 {"dcngettext",  Op_builtin,      LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
1841 {"default",     Op_K_default,    LEX_DEFAULT,   GAWKX,          0},
1842 {"delete",      Op_K_delete,     LEX_DELETE,    NOT_OLD,        0},
1843 {"do",          Op_K_do,         LEX_DO,        NOT_OLD|BREAK|CONTINUE, 0},
1844 {"else",        Op_K_else,       LEX_ELSE,      0,              0},
1845 {"eval",        Op_symbol,       LEX_EVAL,      0,              0},
1846 {"exit",        Op_K_exit,       LEX_EXIT,      0,              0},
1847 {"exp",         Op_builtin,      LEX_BUILTIN,   A(1),           do_exp},
1848 {"extension",   Op_builtin,      LEX_BUILTIN,   GAWKX|A(2),     do_ext},
1849 {"fflush",      Op_builtin,      LEX_BUILTIN,   RESX|A(0)|A(1), do_fflush},
1850 {"for",         Op_K_for,        LEX_FOR,       BREAK|CONTINUE, 0},
1851 {"func",        Op_func, LEX_FUNCTION,  NOT_POSIX|NOT_OLD,      0},
1852 {"function",Op_func, LEX_FUNCTION,      NOT_OLD,        0},
1853 {"gensub",      Op_sub_builtin,  LEX_BUILTIN,   GAWKX|A(3)|A(4), 0},
1854 {"getline",     Op_K_getline_redir,      LEX_GETLINE,   NOT_OLD,        0},
1855 {"gsub",        Op_sub_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), 0},
1856 {"if",          Op_K_if,         LEX_IF,        0,              0},
1857 {"in",          Op_symbol,       LEX_IN,        0,              0},
1858 {"include",  Op_symbol,  LEX_INCLUDE,   GAWKX,  0},
1859 {"index",       Op_builtin,      LEX_BUILTIN,   A(2),           do_index},
1860 {"int",         Op_builtin,      LEX_BUILTIN,   A(1),           do_int},
1861 {"isarray",     Op_builtin,      LEX_BUILTIN,   GAWKX|A(1),     do_isarray},
1862 {"length",      Op_builtin,      LEX_LENGTH,    A(0)|A(1),      do_length},
1863 {"log",         Op_builtin,      LEX_BUILTIN,   A(1),           do_log},
1864 {"lshift",      Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_lshift},
1865 {"match",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(2)|A(3), do_match},
1866 {"mktime",      Op_builtin,      LEX_BUILTIN,   GAWKX|A(1),     do_mktime},
1867 {"next",        Op_K_next,       LEX_NEXT,      0,              0},
1868 {"nextfile",    Op_K_nextfile, LEX_NEXTFILE,    GAWKX,          0},
1869 {"or",          Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_or},
1870 {"patsplit",    Op_builtin,    LEX_BUILTIN,     GAWKX|A(2)|A(3)|A(4), do_patsplit},
1871 {"print",       Op_K_print,      LEX_PRINT,     0,              0},
1872 {"printf",      Op_K_printf,     LEX_PRINTF,    0,              0},
1873 {"rand",        Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(0),   do_rand},
1874 {"return",      Op_K_return,     LEX_RETURN,    NOT_OLD,        0},
1875 {"rshift",      Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_rshift},
1876 {"sin",         Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_sin},
1877 {"split",       Op_builtin,      LEX_BUILTIN,   A(2)|A(3)|A(4), do_split},
1878 {"sprintf",     Op_builtin,      LEX_BUILTIN,   0,              do_sprintf},
1879 {"sqrt",        Op_builtin,      LEX_BUILTIN,   A(1),           do_sqrt},
1880 {"srand",       Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(0)|A(1), do_srand},
1881 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
1882 {"stopme",      Op_builtin,    LEX_BUILTIN,     GAWKX|A(0),     stopme},
1883 #endif
1884 {"strftime",    Op_builtin,      LEX_BUILTIN,   GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
1885 {"strtonum",    Op_builtin,    LEX_BUILTIN,     GAWKX|A(1),     do_strtonum},
1886 {"sub",         Op_sub_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), 0},
1887 {"substr",      Op_builtin,      LEX_BUILTIN,   A(2)|A(3),      do_substr},
1888 {"switch",      Op_K_switch,     LEX_SWITCH,    GAWKX|BREAK,    0},
1889 {"system",      Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_system},
1890 {"systime",     Op_builtin,      LEX_BUILTIN,   GAWKX|A(0),     do_systime},
1891 {"tolower",     Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_tolower},
1892 {"toupper",     Op_builtin,      LEX_BUILTIN,   NOT_OLD|A(1),   do_toupper},
1893 {"while",       Op_K_while,      LEX_WHILE,     BREAK|CONTINUE, 0},
1894 {"xor",         Op_builtin,    LEX_BUILTIN,     GAWKX|A(2),     do_xor},
1895 };
1896
1897 #if MBS_SUPPORT
1898 /* Variable containing the current shift state.  */
1899 static mbstate_t cur_mbstate;
1900 /* Ring buffer containing current characters.  */
1901 #define MAX_CHAR_IN_RING_BUFFER 8
1902 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1903 static char cur_char_ring[RING_BUFFER_SIZE];
1904 /* Index for ring buffers.  */
1905 static int cur_ring_idx;
1906 /* This macro means that last nextc() return a singlebyte character
1907    or 1st byte of a multibyte character.  */
1908 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1909 #else /* MBS_SUPPORT */
1910 /* a dummy */
1911 #define nextc_is_1stbyte 1
1912 #endif /* MBS_SUPPORT */
1913
1914 /* getfname --- return name of a builtin function (for pretty printing) */
1915
1916 const char *
1917 getfname(NODE *(*fptr)(int))
1918 {
1919         int i, j;
1920
1921         j = sizeof(tokentab) / sizeof(tokentab[0]);
1922         /* linear search, no other way to do it */
1923         for (i = 0; i < j; i++) 
1924                 if (tokentab[i].ptr == fptr)
1925                         return tokentab[i].operator;
1926
1927         return NULL;
1928 }
1929
1930 /* print_included_from --- print `Included from ..' file names and locations */
1931
1932 static void
1933 print_included_from()
1934 {
1935         int saveline, line;
1936         SRCFILE *s;
1937
1938         /* suppress current file name, line # from `.. included from ..' msgs */ 
1939         saveline = sourceline;
1940         sourceline = 0;
1941
1942         for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
1943                 s = s->next;
1944                 if (s == NULL || s->fd <= INVALID_HANDLE)
1945                         continue;
1946                 line = s->srclines;
1947
1948                 /* if last token is NEWLINE, line number is off by 1. */
1949                 if (s->lasttok == NEWLINE)
1950                         line--;
1951                 msg("%s %s:%d%c",
1952                         s->prev == sourcefile ? "In file included from"
1953                                                                   : "                 from",
1954                         (s->stype == SRC_INC ||
1955                                  s->stype == SRC_FILE) ? s->src : "cmd. line",
1956                         line,
1957                         s->stype == SRC_INC ? ',' : ':'
1958                 );
1959         }
1960         sourceline = saveline;
1961 }
1962
1963 /* warning_ln --- print a warning message with location */
1964
1965 static void
1966 warning_ln(int line, const char *mesg, ...)
1967 {
1968         va_list args;
1969         int saveline;
1970
1971         saveline = sourceline;
1972         sourceline = line;
1973         print_included_from();
1974         va_start(args, mesg);
1975         err(_("warning: "), mesg, args);
1976         va_end(args);
1977         sourceline = saveline;
1978 }
1979
1980 /* lintwarn_ln --- print a lint warning and location */
1981
1982 static void
1983 lintwarn_ln(int line, const char *mesg, ...)
1984 {
1985         va_list args;
1986         int saveline;
1987
1988         saveline = sourceline;
1989         sourceline = line;
1990         print_included_from();
1991         va_start(args, mesg);
1992         if (lintfunc == r_fatal)
1993                 err(_("fatal: "), mesg, args);
1994         else
1995                 err(_("warning: "), mesg, args);
1996         va_end(args);
1997         sourceline = saveline;
1998         if (lintfunc == r_fatal)
1999                 gawk_exit(EXIT_FATAL);
2000 }
2001
2002 /* error_ln --- print an error message and location */
2003
2004 static void
2005 error_ln(int line, const char *m, ...)
2006 {
2007         va_list args;
2008         int saveline;
2009
2010         saveline = sourceline;
2011         sourceline = line;
2012         print_included_from();
2013         errcount++;
2014         va_start(args, m);
2015         err("error: ", m, args);
2016         va_end(args);
2017         sourceline = saveline;
2018 }
2019
2020 /* yyerror --- print a syntax error message, show where */
2021
2022 static void
2023 yyerror(const char *m, ...)
2024 {
2025         va_list args;
2026         const char *mesg = NULL;
2027         char *bp, *cp;
2028         char *scan;
2029         char *buf;
2030         int count;
2031         static char end_of_file_line[] = "(END OF FILE)";
2032         char save;
2033
2034         print_included_from();
2035
2036         errcount++;
2037         /* Find the current line in the input file */
2038         if (lexptr && lexeme) {
2039                 if (thisline == NULL) {
2040                         cp = lexeme;
2041                         if (*cp == '\n') {
2042                                 cp--;
2043                                 mesg = _("unexpected newline or end of string");
2044                         }
2045                         for (; cp != lexptr_begin && *cp != '\n'; --cp)
2046                                 continue;
2047                         if (*cp == '\n')
2048                                 cp++;
2049                         thisline = cp;
2050                 }
2051                 /* NL isn't guaranteed */
2052                 bp = lexeme;
2053                 while (bp < lexend && *bp && *bp != '\n')
2054                         bp++;
2055         } else {
2056                 thisline = end_of_file_line;
2057                 bp = thisline + strlen(thisline);
2058         }
2059
2060         /*
2061          * Saving and restoring *bp keeps valgrind happy,
2062          * since the guts of glibc uses strlen, even though
2063          * we're passing an explict precision. Sigh.
2064          *
2065          * 8/2003: We may not need this anymore.
2066          */
2067         save = *bp;
2068         *bp = '\0';
2069
2070         msg("%.*s", (int) (bp - thisline), thisline);
2071
2072         *bp = save;
2073         va_start(args, m);
2074         if (mesg == NULL)
2075                 mesg = m;
2076
2077         count = (bp - thisline) + strlen(mesg) + 2 + 1;
2078         emalloc(buf, char *, count, "yyerror");
2079
2080         bp = buf;
2081
2082         if (lexptr != NULL) {
2083                 scan = thisline;
2084                 while (scan < lexeme)
2085                         if (*scan++ == '\t')
2086                                 *bp++ = '\t';
2087                         else
2088                                 *bp++ = ' ';
2089                 *bp++ = '^';
2090                 *bp++ = ' ';
2091         }
2092         strcpy(bp, mesg);
2093         err("", buf, args);
2094         va_end(args);
2095         efree(buf);
2096 }
2097
2098 /* mk_program --- create a single list of instructions */
2099
2100 static INSTRUCTION *
2101 mk_program()
2102 {
2103         INSTRUCTION *cp, *tmp;
2104
2105 #define begin_block         rule_block[BEGIN]
2106 #define end_block           rule_block[END]
2107 #define prog_block          rule_block[Rule]
2108 #define beginfile_block     rule_block[BEGINFILE]
2109 #define endfile_block       rule_block[ENDFILE]
2110
2111         if (end_block == NULL)
2112                 end_block = list_create(ip_end);
2113         else
2114                 (void) list_prepend(end_block, ip_end);
2115
2116         if (! in_main_context()) {
2117                 if (begin_block != NULL && prog_block != NULL)
2118                         cp = list_merge(begin_block, prog_block);
2119                 else
2120                         cp = (begin_block != NULL) ? begin_block : prog_block;
2121
2122                 if (cp != NULL)
2123                         (void) list_merge(cp, end_block);
2124                 else
2125                         cp = end_block;
2126
2127                 (void) list_append(cp, instruction(Op_stop));
2128                 goto out;
2129         }
2130
2131         if (endfile_block == NULL)
2132                 endfile_block = list_create(ip_endfile);
2133         else {
2134                 ip_rec->has_endfile = TRUE;
2135                 (void) list_prepend(endfile_block, ip_endfile);
2136         }
2137
2138         if (beginfile_block == NULL)
2139                 beginfile_block = list_create(ip_beginfile);
2140         else
2141                 (void) list_prepend(beginfile_block, ip_beginfile);
2142
2143         if (prog_block == NULL) {
2144                 if (end_block->nexti == end_block->lasti
2145                                 && beginfile_block->nexti == beginfile_block->lasti 
2146                                 && endfile_block->nexti == endfile_block->lasti
2147                 ) {
2148                         /* no pattern-action and (real) end, beginfile or endfile blocks */
2149                         bcfree(ip_rec);
2150                         bcfree(ip_newfile);
2151                         ip_rec = ip_newfile = NULL;
2152
2153                         list_append(beginfile_block, instruction(Op_after_beginfile));
2154                         (void) list_append(endfile_block, instruction(Op_after_endfile));
2155
2156                         if (begin_block == NULL)     /* no program at all */
2157                                 cp = end_block;
2158                         else
2159                                 cp = list_merge(begin_block, end_block);
2160                         (void) list_append(cp, ip_atexit);
2161                         (void) list_append(cp, instruction(Op_stop));
2162
2163                         /* append beginfile_block and endfile_block for sole use
2164                          * in getline without redirection (Op_K_getline).
2165                          */
2166
2167                         (void) list_merge(cp, beginfile_block);
2168                         (void) list_merge(cp, endfile_block);
2169
2170                         goto out;
2171
2172                 } else {
2173                         /* install a do-nothing prog block */
2174                         prog_block = list_create(instruction(Op_no_op));
2175                 }
2176         }
2177
2178         (void) list_append(endfile_block, instruction(Op_after_endfile));
2179         (void) list_prepend(prog_block, ip_rec);
2180         (void) list_append(prog_block, instruction(Op_jmp));
2181         prog_block->lasti->target_jmp = ip_rec;
2182                 
2183         list_append(beginfile_block, instruction(Op_after_beginfile));
2184
2185         cp = list_merge(beginfile_block, prog_block);
2186         (void) list_prepend(cp, ip_newfile);
2187         (void) list_merge(cp, endfile_block);
2188         (void) list_merge(cp, end_block);
2189         if (begin_block != NULL)
2190                 cp = list_merge(begin_block, cp);
2191
2192         (void) list_append(cp, ip_atexit);
2193         (void) list_append(cp, instruction(Op_stop));
2194
2195 out:
2196         /* delete the Op_list, not needed */
2197         tmp = cp->nexti;
2198         bcfree(cp);
2199         return tmp;
2200
2201 #undef begin_block
2202 #undef end_block
2203 #undef prog_block
2204 #undef beginfile_block
2205 #undef endfile_block 
2206 }
2207
2208 /* parse_program --- read in the program and convert into a list of instructions */
2209
2210 int
2211 parse_program(INSTRUCTION **pcode)
2212 {
2213         int ret;
2214
2215         /* pre-create non-local jump targets
2216          * ip_end (Op_no_op) -- used as jump target for `exit'
2217          * outside an END block.
2218          */
2219         ip_end = instruction(Op_no_op);
2220
2221         if (! in_main_context())
2222                 ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
2223         else {
2224                 ip_endfile = instruction(Op_no_op);
2225                 ip_beginfile = instruction(Op_no_op);
2226                 ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
2227                 ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
2228                 ip_newfile->target_jmp = ip_end;
2229                 ip_newfile->target_endfile = ip_endfile;
2230                 (ip_newfile + 1)->target_get_record = ip_rec;
2231                 ip_rec->target_newfile = ip_newfile;
2232                 ip_atexit = instruction(Op_atexit);     /* target for `exit' in END block */
2233         }
2234
2235         sourcefile = srcfiles->next;
2236         lexeof = FALSE;
2237         lexptr = NULL;
2238         lasttok = 0;
2239         memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
2240         errcount = 0;
2241         tok = tokstart != NULL ? tokstart : tokexpand();
2242
2243         ret = yyparse();
2244         *pcode = mk_program();
2245
2246         /* avoid false source indications */
2247         source = NULL;
2248         sourceline = 0;
2249         if (ret == 0)   /* avoid spurious warning if parser aborted with YYABORT */
2250                 check_funcs();
2251
2252         return (ret || errcount);
2253 }
2254
2255 /* do_add_srcfile --- add one item to srcfiles */
2256
2257 static SRCFILE *
2258 do_add_srcfile(int stype, char *src, char *path, SRCFILE *thisfile)
2259 {
2260         SRCFILE *s;
2261
2262         emalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
2263         memset(s, 0, sizeof(SRCFILE));
2264         s->src = estrdup(src, strlen(src));
2265         s->fullpath = path;
2266         s->stype = stype;
2267         s->fd = INVALID_HANDLE;
2268         s->next = thisfile;
2269         s->prev = thisfile->prev;
2270         thisfile->prev->next = s;
2271         thisfile->prev = s;
2272         return s;
2273 }
2274
2275 /* add_srcfile --- add one item to srcfiles after checking if
2276  *                              a source file exists and not already in list.
2277  */
2278
2279 SRCFILE *
2280 add_srcfile(int stype, char *src, SRCFILE *thisfile, int *already_included, int *errcode)
2281 {
2282         SRCFILE *s;
2283         struct stat sbuf;
2284         char *path;
2285         int errno_val = 0;
2286
2287         if (already_included)
2288                 *already_included = FALSE;
2289         if (errcode)
2290                 *errcode = 0;
2291         if (stype == SRC_CMDLINE || stype == SRC_STDIN)
2292                 return do_add_srcfile(stype, src, NULL, thisfile);
2293
2294         path = find_source(src, &sbuf, &errno_val);
2295         if (path == NULL) {
2296                 if (errcode) {
2297                         *errcode = errno_val;
2298                         return NULL;
2299                 }
2300                 fatal(_("can't open source file `%s' for reading (%s)"),
2301                                 src, errno_val ? strerror(errno_val) : _("reason unknown"));
2302         }
2303
2304         for (s = srcfiles->next; s != srcfiles; s = s->next) {
2305                 if ((s->stype == SRC_FILE || s->stype == SRC_INC)
2306                                 && files_are_same(path, s)
2307                 ) {
2308                         if (do_lint) {
2309                                 int line = sourceline;
2310                                 /* Kludge: the line number may be off for `@include file'.
2311                                  * Since, this function is also used for '-f file' in main.c,
2312                                  * sourceline > 1 check ensures that the call is at
2313                                  * parse time.
2314                                  */
2315                                 if (sourceline > 1 && lasttok == NEWLINE)
2316                                         line--;
2317                                 lintwarn_ln(line, _("already included source file `%s'"), src);
2318                         }
2319                         efree(path);
2320                         if (already_included)
2321                                 *already_included = TRUE;
2322                         return NULL;
2323                 }
2324         }
2325
2326         s = do_add_srcfile(stype, src, path, thisfile);
2327         s->sbuf = sbuf;
2328         s->mtime = sbuf.st_mtime;
2329         return s;
2330 }
2331
2332 /* include_source --- read program from source included using `@include' */
2333
2334 static int
2335 include_source(INSTRUCTION *file)
2336 {
2337         SRCFILE *s;
2338         char *src = file->lextok;
2339         int errcode;
2340         int already_included;
2341
2342         if (do_traditional || do_posix) {
2343                 error_ln(file->source_line, _("@include is a gawk extension"));
2344                 return -1;
2345         }
2346
2347         if (strlen(src) == 0) {
2348                 if (do_lint)
2349                         lintwarn_ln(file->source_line, _("empty filename after @include"));
2350                 return 0;
2351         }
2352
2353         s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
2354         if (s == NULL) {
2355                 if (already_included)
2356                         return 0;
2357                 error_ln(file->source_line,
2358                         _("can't open source file `%s' for reading (%s)"),
2359                         src, errcode ? strerror(errcode) : _("reason unknown"));
2360                 return -1;
2361         }
2362
2363         /* save scanner state for the current sourcefile */
2364         sourcefile->srclines = sourceline;
2365         sourcefile->lexptr = lexptr;
2366         sourcefile->lexend = lexend;
2367         sourcefile->lexptr_begin = lexptr_begin;        
2368         sourcefile->lexeme = lexeme;
2369         sourcefile->lasttok = lasttok;
2370
2371         /* included file becomes the current source */ 
2372         sourcefile = s;
2373         lexptr = NULL;
2374         sourceline = 0;
2375         source = NULL;
2376         lasttok = 0;
2377         lexeof = FALSE;
2378         eof_warned = FALSE;
2379         return 0;
2380 }
2381
2382 /* next_sourcefile --- read program from the next source in srcfiles */
2383
2384 static void
2385 next_sourcefile()
2386 {
2387         static int (*closefunc)(int fd) = NULL;
2388
2389         if (closefunc == NULL) {
2390                 char *cp = getenv("AWKREADFUNC");
2391
2392                 /* If necessary, one day, test value for different functions.  */
2393                 if (cp == NULL)
2394                         closefunc = close;
2395                 else
2396                         closefunc = one_line_close;
2397         }
2398
2399         /*
2400          * This won't be true if there's an invalid character in
2401          * the source file or source string (e.g., user typo).
2402          * Previous versions of gawk did not core dump in such a
2403          * case.
2404          *
2405          * assert(lexeof == TRUE);
2406          */
2407         lexeof = FALSE;
2408         eof_warned = FALSE;
2409         sourcefile->srclines = sourceline;      /* total no of lines in current file */
2410         if (sourcefile->fd > INVALID_HANDLE) {
2411                 if (sourcefile->fd != fileno(stdin))  /* safety */
2412                         (*closefunc)(sourcefile->fd);
2413                 sourcefile->fd = INVALID_HANDLE;
2414         }
2415         if (sourcefile->buf != NULL) {
2416                 efree(sourcefile->buf);
2417                 sourcefile->buf = NULL;
2418                 sourcefile->lexptr_begin = NULL;
2419         }
2420
2421         sourcefile = sourcefile->next;
2422         if (sourcefile == srcfiles)
2423                 return;
2424
2425         if (sourcefile->lexptr_begin != NULL) {
2426                 /* resume reading from already opened file (postponed to process '@include') */
2427                 lexptr = sourcefile->lexptr;
2428                 lexend = sourcefile->lexend;
2429                 lasttok = sourcefile->lasttok;
2430                 lexptr_begin = sourcefile->lexptr_begin;
2431                 lexeme = sourcefile->lexeme;
2432                 sourceline = sourcefile->srclines;
2433                 source = sourcefile->src;
2434         } else {
2435                 lexptr = NULL;
2436                 sourceline = 0;
2437                 source = NULL;
2438                 lasttok = 0;
2439         }
2440 }
2441
2442 /* get_src_buf --- read the next buffer of source program */
2443
2444 static char *
2445 get_src_buf()
2446 {
2447         int n;
2448         char *scan;
2449         int newfile;
2450         int savelen;
2451         struct stat sbuf;
2452
2453         /*
2454          * No argument prototype on readfunc on purpose,
2455          * avoids problems with some ancient systems where
2456          * the types of arguments to read() aren't up to date.
2457          */
2458         static ssize_t (*readfunc)() = 0;
2459
2460         if (readfunc == NULL) {
2461                 char *cp = getenv("AWKREADFUNC");
2462
2463                 /* If necessary, one day, test value for different functions.  */
2464                 if (cp == NULL)
2465                         /*
2466                          * cast is to remove warnings on systems with
2467                          * different return types for read.
2468                          */
2469                         readfunc = ( ssize_t(*)() ) read;
2470                 else
2471                         readfunc = read_one_line;
2472         }
2473
2474         newfile = FALSE;
2475         if (sourcefile == srcfiles)
2476                 return NULL;
2477
2478         if (sourcefile->stype == SRC_CMDLINE) {
2479                 if (sourcefile->bufsize == 0) {
2480                         sourcefile->bufsize = strlen(sourcefile->src);
2481                         lexptr = lexptr_begin = lexeme = sourcefile->src;
2482                         lexend = lexptr + sourcefile->bufsize;
2483                         sourceline = 1;
2484                         if (sourcefile->bufsize == 0) {
2485                                 /*
2486                                  * Yet Another Special case:
2487                                  *      gawk '' /path/name
2488                                  * Sigh.
2489                                  */
2490                                 static short warned = FALSE;
2491
2492                                 if (do_lint && ! warned) {
2493                                         warned = TRUE;
2494                                         lintwarn(_("empty program text on command line"));
2495                                 }
2496                                 lexeof = TRUE;
2497                         }
2498                 } else if (sourcefile->buf == NULL  && *(lexptr-1) != '\n') {
2499                         /*
2500                          * The following goop is to ensure that the source
2501                          * ends with a newline and that the entire current
2502                          * line is available for error messages.
2503                          */
2504                         int offset;
2505                         char *buf;
2506
2507                         offset = lexptr - lexeme;
2508                         for (scan = lexeme; scan > lexptr_begin; scan--)
2509                                 if (*scan == '\n') {
2510                                         scan++;
2511                                         break;
2512                                 }
2513                         savelen = lexptr - scan;
2514                         emalloc(buf, char *, savelen + 1, "get_src_buf");
2515                         memcpy(buf, scan, savelen);
2516                         thisline = buf;
2517                         lexptr = buf + savelen;
2518                         *lexptr = '\n';
2519                         lexeme = lexptr - offset;
2520                         lexptr_begin = buf;
2521                         lexend = lexptr + 1;
2522                         sourcefile->buf = buf;
2523                 } else
2524                         lexeof = TRUE;
2525                 return lexptr;
2526         }
2527
2528         if (sourcefile->fd <= INVALID_HANDLE) {
2529                 int fd;
2530                 int l;
2531
2532                 source = sourcefile->src;
2533                 if (source == NULL)
2534                         return NULL;
2535                 fd = srcopen(sourcefile);
2536                 if (fd <= INVALID_HANDLE) {
2537                         char *in;
2538
2539                         /* suppress file name and line no. in error mesg */
2540                         in = source;
2541                         source = NULL;
2542                         error(_("can't open source file `%s' for reading (%s)"),
2543                                 in, strerror(errno));
2544                         errcount++;
2545                         lexeof = TRUE;
2546                         return sourcefile->src;
2547                 }
2548
2549                 sourcefile->fd = fd;
2550                 l = optimal_bufsize(fd, &sbuf);
2551                 /*
2552                  * Make sure that something silly like
2553                  *      AWKBUFSIZE=8 make check
2554                  * works ok.
2555                  */
2556 #define A_DECENT_BUFFER_SIZE    128
2557                 if (l < A_DECENT_BUFFER_SIZE)
2558                         l = A_DECENT_BUFFER_SIZE;
2559 #undef A_DECENT_BUFFER_SIZE
2560                 sourcefile->bufsize = l;
2561                 newfile = TRUE;
2562                 emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2563                 lexptr = lexptr_begin = lexeme = sourcefile->buf;
2564                 savelen = 0;
2565                 sourceline = 1;
2566                 thisline = NULL;
2567         } else {
2568                 /*
2569                  * Here, we retain the current source line in the beginning of the buffer.
2570                  */
2571                 int offset;
2572                 for (scan = lexeme; scan > lexptr_begin; scan--)
2573                         if (*scan == '\n') {
2574                                 scan++;
2575                                 break;
2576                         }
2577
2578                 savelen = lexptr - scan;
2579                 offset = lexptr - lexeme;
2580
2581                 if (savelen > 0) {
2582                         /*
2583                          * Need to make sure we have room left for reading new text;
2584                          * grow the buffer (by doubling, an arbitrary choice), if the retained line
2585                          * takes up more than a certain percentage (50%, again an arbitrary figure)
2586                          * of the available space.
2587                          */
2588
2589                         if (savelen > sourcefile->bufsize / 2) { /* long line or token  */
2590                                 sourcefile->bufsize *= 2;
2591                                 erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
2592                                 scan = sourcefile->buf + (scan - lexptr_begin);
2593                                 lexptr_begin = sourcefile->buf;
2594                         }
2595
2596                         thisline = lexptr_begin;
2597                         memmove(thisline, scan, savelen);
2598                         lexptr = thisline + savelen;
2599                         lexeme = lexptr - offset;
2600                 } else {
2601                         savelen = 0;
2602                         lexptr = lexeme = lexptr_begin;
2603                         thisline = NULL;
2604                 }
2605         }
2606
2607         n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
2608         if (n == -1) {
2609                 error(_("can't read sourcefile `%s' (%s)"),
2610                                 source, strerror(errno));
2611                 errcount++;
2612                 lexeof = TRUE;
2613         } else {
2614                 lexend = lexptr + n;
2615                 if (n == 0) {
2616                         static short warned = FALSE;
2617                         if (do_lint && newfile && ! warned){
2618                                 warned = TRUE;
2619                                 sourceline = 0;
2620                                 lintwarn(_("source file `%s' is empty"), source);
2621                         }
2622                         lexeof = TRUE;
2623                 }
2624         }
2625         return sourcefile->buf;
2626 }
2627
2628 /* tokadd --- add a character to the token buffer */
2629
2630 #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
2631
2632 /* tokexpand --- grow the token buffer */
2633
2634 static char *
2635 tokexpand()
2636 {
2637         static int toksize;
2638         int tokoffset;
2639                         
2640         if (tokstart != NULL) {
2641                 tokoffset = tok - tokstart;
2642                 toksize *= 2;
2643                 erealloc(tokstart, char *, toksize, "tokexpand");
2644                 tok = tokstart + tokoffset;
2645         } else {
2646                 toksize = 60;
2647                 emalloc(tokstart, char *, toksize, "tokexpand");
2648                 tok = tokstart;
2649         }
2650         tokend = tokstart + toksize;
2651         return tok;
2652 }
2653
2654 /* nextc --- get the next input character */
2655
2656 #if MBS_SUPPORT
2657
2658 static int
2659 nextc(void)
2660 {
2661         if (gawk_mb_cur_max > 1) {
2662 again:
2663                 if (lexeof)
2664                         return END_FILE;
2665                 if (lexptr == NULL || lexptr >= lexend) {
2666                         if (get_src_buf())
2667                                 goto again;
2668                         return END_SRC;
2669                 }
2670
2671                 /* Update the buffer index.  */
2672                 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
2673                         cur_ring_idx + 1;
2674
2675                 /* Did we already check the current character?  */
2676                 if (cur_char_ring[cur_ring_idx] == 0) {
2677                         /* No, we need to check the next character on the buffer.  */
2678                         int idx, work_ring_idx = cur_ring_idx;
2679                         mbstate_t tmp_state;
2680                         size_t mbclen;
2681         
2682                         for (idx = 0 ; lexptr + idx < lexend ; idx++) {
2683                                 tmp_state = cur_mbstate;
2684                                 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
2685
2686                                 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
2687                                         /* It is a singlebyte character, non-complete multibyte
2688                                            character or EOF.  We treat it as a singlebyte
2689                                            character.  */
2690                                         cur_char_ring[work_ring_idx] = 1;
2691                                         break;
2692                                 } else if (mbclen == (size_t)-2) {
2693                                         /* It is not a complete multibyte character.  */
2694                                         cur_char_ring[work_ring_idx] = idx + 1;
2695                                 } else {
2696                                         /* mbclen > 1 */
2697                                         cur_char_ring[work_ring_idx] = mbclen;
2698                                         break;
2699                                 }
2700                                 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2701                                         0 : work_ring_idx + 1;
2702                         }
2703                         cur_mbstate = tmp_state;
2704
2705                         /* Put a mark on the position on which we write next character.  */
2706                         work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
2707                                 0 : work_ring_idx + 1;
2708                         cur_char_ring[work_ring_idx] = 0;
2709                 }
2710
2711                 return (int) (unsigned char) *lexptr++;
2712         } else {
2713                 do {
2714                         if (lexeof)
2715                                 return END_FILE;
2716                         if (lexptr && lexptr < lexend)
2717                                         return ((int) (unsigned char) *lexptr++);
2718                 } while (get_src_buf());
2719                 return END_SRC;
2720         }
2721 }
2722
2723 #else /* MBS_SUPPORT */
2724
2725 int
2726 nextc()
2727 {
2728         do {
2729                 if (lexeof)
2730                         return END_FILE;
2731                 if (lexptr && lexptr < lexend)
2732                         return ((int) (unsigned char) *lexptr++);
2733         } while (get_src_buf());
2734         return END_SRC;
2735 }
2736
2737 #endif /* MBS_SUPPORT */
2738
2739 /* pushback --- push a character back on the input */
2740
2741 static inline void
2742 pushback(void)
2743 {
2744 #if MBS_SUPPORT
2745         if (gawk_mb_cur_max > 1)
2746                 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
2747                         cur_ring_idx - 1;
2748 #endif
2749         (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
2750 }
2751
2752
2753 /* allow_newline --- allow newline after &&, ||, ? and : */
2754
2755 static void
2756 allow_newline(void)
2757 {
2758         int c;
2759
2760         for (;;) {
2761                 c = nextc();
2762                 if (c == END_FILE) {
2763                         pushback();
2764                         break;
2765                 }
2766                 if (c == '#') {
2767                         while ((c = nextc()) != '\n' && c != END_FILE)
2768                                 continue;
2769                         if (c == END_FILE) {
2770                                 pushback();
2771                                 break;
2772                         }
2773                 }
2774                 if (c == '\n')
2775                         sourceline++;
2776                 if (! isspace(c)) {
2777                         pushback();
2778                         break;
2779                 }
2780         }
2781 }
2782
2783 /* newline_eof --- return newline or EOF as needed and adjust variables */
2784
2785 /*
2786  * This routine used to be a macro, however GCC 4.6.2 warned about
2787  * the result of a computation not being used.  Converting to a function
2788  * removes the warnings.
2789  */
2790
2791 static int newline_eof()
2792 {
2793         /* NB: a newline at end does not start a source line. */
2794         if (lasttok != NEWLINE) {
2795                 pushback();
2796                 if (do_lint && ! eof_warned) {
2797                         lintwarn(_("source file does not end in newline"));
2798                         eof_warned = TRUE;
2799                 }
2800                 sourceline++;
2801                 return NEWLINE;
2802         }
2803
2804         sourceline--;
2805         eof_warned = FALSE;
2806         return LEX_EOF;
2807 }
2808
2809 /* yylex --- Read the input and turn it into tokens. */
2810
2811 static int
2812 yylex(void)
2813 {
2814         int c;
2815         int seen_e = FALSE;             /* These are for numbers */
2816         int seen_point = FALSE;
2817         int esc_seen;           /* for literal strings */
2818         int mid;
2819         static int did_newline = FALSE;
2820         char *tokkey;
2821         int inhex = FALSE;
2822         int intlstr = FALSE;
2823
2824 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
2825
2826 #define NEWLINE_EOF newline_eof()
2827
2828         yylval = (INSTRUCTION *) NULL;
2829         if (lasttok == SUBSCRIPT) {
2830                 lasttok = 0;
2831                 return SUBSCRIPT;
2832         }
2833  
2834         if (lasttok == LEX_EOF)         /* error earlier in current source, must give up !! */
2835                 return 0;
2836
2837         c = nextc();
2838         if (c == END_SRC)
2839                 return 0;
2840         if (c == END_FILE)
2841                 return lasttok = NEWLINE_EOF;
2842         pushback();
2843
2844 #if defined __EMX__
2845         /*
2846          * added for OS/2's extproc feature of cmd.exe
2847          * (like #! in BSD sh)
2848          */
2849         if (strncasecmp(lexptr, "extproc ", 8) == 0) {
2850                 while (*lexptr && *lexptr != '\n')
2851                         lexptr++;
2852         }
2853 #endif
2854
2855         lexeme = lexptr;
2856         thisline = NULL;
2857         if (want_regexp) {
2858                 int in_brack = 0;       /* count brackets, [[:alnum:]] allowed */
2859                 /*
2860                  * Counting brackets is non-trivial. [[] is ok,
2861                  * and so is [\]], with a point being that /[/]/ as a regexp
2862                  * constant has to work.
2863                  *
2864                  * Do not count [ or ] if either one is preceded by a \.
2865                  * A `[' should be counted if
2866                  *  a) it is the first one so far (in_brack == 0)
2867                  *  b) it is the `[' in `[:'
2868                  * A ']' should be counted if not preceded by a \, since
2869                  * it is either closing `:]' or just a plain list.
2870                  * According to POSIX, []] is how you put a ] into a set.
2871                  * Try to handle that too.
2872                  *
2873                  * The code for \ handles \[ and \].
2874                  */
2875
2876                 want_regexp = FALSE;
2877                 tok = tokstart;
2878                 for (;;) {
2879                         c = nextc();
2880
2881                         if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
2882                         case '[':
2883                                 /* one day check for `.' and `=' too */
2884                                 if (nextc() == ':' || in_brack == 0)
2885                                         in_brack++;
2886                                 pushback();
2887                                 break;
2888                         case ']':
2889                                 if (tokstart[0] == '['
2890                                     && (tok == tokstart + 1
2891                                         || (tok == tokstart + 2
2892                                             && tokstart[1] == '^')))
2893                                         /* do nothing */;
2894                                 else
2895                                         in_brack--;
2896                                 break;
2897                         case '\\':
2898                                 if ((c = nextc()) == END_FILE) {
2899                                         pushback();
2900                                         yyerror(_("unterminated regexp ends with `\\' at end of file"));
2901                                         goto end_regexp; /* kludge */
2902                                 } else if (c == '\n') {
2903                                         sourceline++;
2904                                         continue;
2905                                 } else {
2906                                         tokadd('\\');
2907                                         tokadd(c);
2908                                         continue;
2909                                 }
2910                                 break;
2911                         case '/':       /* end of the regexp */
2912                                 if (in_brack > 0)
2913                                         break;
2914 end_regexp:
2915                                 yylval = GET_INSTRUCTION(Op_token);
2916                                 yylval->lextok = estrdup(tokstart, tok - tokstart);
2917                                 if (do_lint) {
2918                                         int peek = nextc();
2919
2920                                         pushback();
2921                                         if (peek == 'i' || peek == 's') {
2922                                                 if (source)
2923                                                         lintwarn(
2924                                                 _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
2925                                                                 source, sourceline, peek);
2926                                                 else
2927                                                         lintwarn(
2928                                                 _("tawk regex modifier `/.../%c' doesn't work in gawk"),
2929                                                                 peek);
2930                                         }
2931                                 }
2932                                 return lasttok = REGEXP;
2933                         case '\n':
2934                                 pushback();
2935                                 yyerror(_("unterminated regexp"));
2936                                 goto end_regexp;        /* kludge */
2937                         case END_FILE:
2938                                 pushback();
2939                                 yyerror(_("unterminated regexp at end of file"));
2940                                 goto end_regexp;        /* kludge */
2941                         }
2942                         tokadd(c);
2943                 }
2944         }
2945 retry:
2946
2947         /* skipping \r is a hack, but windows is just too pervasive. sigh. */
2948         while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
2949                 continue;
2950
2951         lexeme = lexptr ? lexptr - 1 : lexptr;
2952         thisline = NULL;
2953         tok = tokstart;
2954
2955 #if MBS_SUPPORT
2956         if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
2957 #endif
2958         switch (c) {
2959         case END_SRC:
2960                 return 0;
2961
2962         case END_FILE:
2963                 return lasttok = NEWLINE_EOF;
2964
2965         case '\n':
2966                 sourceline++;
2967                 return lasttok = NEWLINE;
2968
2969         case '#':               /* it's a comment */
2970                 while ((c = nextc()) != '\n') {
2971                         if (c == END_FILE)
2972                                 return lasttok = NEWLINE_EOF;
2973                 }
2974                 sourceline++;
2975                 return lasttok = NEWLINE;
2976
2977         case '@':
2978                 return lasttok = '@';
2979
2980         case '\\':
2981 #ifdef RELAXED_CONTINUATION
2982                 /*
2983                  * This code puports to allow comments and/or whitespace
2984                  * after the `\' at the end of a line used for continuation.
2985                  * Use it at your own risk. We think it's a bad idea, which
2986                  * is why it's not on by default.
2987                  */
2988                 if (! do_traditional) {
2989                         /* strip trailing white-space and/or comment */
2990                         while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
2991                                 continue;
2992                         if (c == '#') {
2993                                 static short warned = FALSE;
2994
2995                                 if (do_lint && ! warned) {
2996                                         warned = TRUE;
2997                                         lintwarn(
2998                 _("use of `\\ #...' line continuation is not portable"));
2999                                 }
3000                                 while ((c = nextc()) != '\n')
3001                                         if (c == END_FILE)
3002                                                 break;
3003                         }
3004                         pushback();
3005                 }
3006 #endif /* RELAXED_CONTINUATION */
3007                 c = nextc();
3008                 if (c == '\r')  /* allow MS-DOS files. bleah */
3009                         c = nextc();
3010                 if (c == '\n') {
3011                         sourceline++;
3012                         goto retry;
3013                 } else {
3014                         yyerror(_("backslash not last character on line"));
3015                         return lasttok = LEX_EOF;
3016                 }
3017                 break;
3018
3019         case ':':
3020         case '?':
3021                 yylval = GET_INSTRUCTION(Op_cond_exp);
3022                 if (! do_posix)
3023                         allow_newline();
3024                 return lasttok = c;
3025
3026                 /*
3027                  * in_parens is undefined unless we are parsing a print
3028                  * statement (in_print), but why bother with a check?
3029                  */
3030         case ')':
3031                 in_parens--;
3032                 return lasttok = c;
3033
3034         case '(':       
3035                 in_parens++;
3036                 return lasttok = c;
3037         case '$':
3038                 yylval = GET_INSTRUCTION(Op_field_spec);
3039                 return lasttok = c;
3040         case '{':
3041                 if (++in_braces == 1)
3042                         firstline = sourceline;
3043         case ';':
3044         case ',':
3045         case '[':
3046                         return lasttok = c;
3047         case ']':
3048                 c = nextc();
3049                 pushback();
3050                 if (c == '[') {
3051                         yylval = GET_INSTRUCTION(Op_sub_array);
3052                         lasttok = ']';
3053                 } else {
3054                         yylval = GET_INSTRUCTION(Op_subscript);
3055                         lasttok = SUBSCRIPT;    /* end of subscripts */
3056                 }
3057                 return ']';
3058
3059         case '*':
3060                 if ((c = nextc()) == '=') {
3061                         yylval = GET_INSTRUCTION(Op_assign_times);
3062                         return lasttok = ASSIGNOP;
3063                 } else if (do_posix) {
3064                         pushback();
3065                         yylval = GET_INSTRUCTION(Op_times);
3066                         return lasttok = '*';
3067                 } else if (c == '*') {
3068                         /* make ** and **= aliases for ^ and ^= */
3069                         static int did_warn_op = FALSE, did_warn_assgn = FALSE;
3070
3071                         if (nextc() == '=') {
3072                                 if (! did_warn_assgn) {
3073                                         did_warn_assgn = TRUE;
3074                                         if (do_lint)
3075                                                 lintwarn(_("POSIX does not allow operator `**='"));
3076                                         if (do_lint_old)
3077                                                 warning(_("old awk does not support operator `**='"));
3078                                 }
3079                                 yylval = GET_INSTRUCTION(Op_assign_exp);
3080                                 return ASSIGNOP;
3081                         } else {
3082                                 pushback();
3083                                 if (! did_warn_op) {
3084                                         did_warn_op = TRUE;
3085                                         if (do_lint)
3086                                                 lintwarn(_("POSIX does not allow operator `**'"));
3087                                         if (do_lint_old)
3088                                                 warning(_("old awk does not support operator `**'"));
3089                                 }
3090                                 yylval = GET_INSTRUCTION(Op_exp);
3091                                 return lasttok = '^';
3092                         }
3093                 }
3094                 pushback();
3095                 yylval = GET_INSTRUCTION(Op_times);
3096                 return lasttok = '*';
3097
3098         case '/':
3099                 if (nextc() == '=') {
3100                         pushback();
3101                         return lasttok = SLASH_BEFORE_EQUAL;
3102                 }
3103                 pushback();
3104                 yylval = GET_INSTRUCTION(Op_quotient);
3105                 return lasttok = '/';
3106
3107         case '%':
3108                 if (nextc() == '=') {
3109                         yylval = GET_INSTRUCTION(Op_assign_mod);
3110                         return lasttok = ASSIGNOP;
3111                 }
3112                 pushback();
3113                 yylval = GET_INSTRUCTION(Op_mod);
3114                 return lasttok = '%';
3115
3116         case '^':
3117         {
3118                 static int did_warn_op = FALSE, did_warn_assgn = FALSE;
3119
3120                 if (nextc() == '=') {
3121                         if (do_lint_old && ! did_warn_assgn) {
3122                                 did_warn_assgn = TRUE;
3123                                 warning(_("operator `^=' is not supported in old awk"));
3124                         }
3125                         yylval = GET_INSTRUCTION(Op_assign_exp);
3126                         return lasttok = ASSIGNOP;
3127                 }
3128                 pushback();
3129                 if (do_lint_old && ! did_warn_op) {
3130                         did_warn_op = TRUE;
3131                         warning(_("operator `^' is not supported in old awk"));
3132                 }
3133                 yylval = GET_INSTRUCTION(Op_exp);       
3134                 return lasttok = '^';
3135         }
3136
3137         case '+':
3138                 if ((c = nextc()) == '=') {
3139                         yylval = GET_INSTRUCTION(Op_assign_plus);
3140                         return lasttok = ASSIGNOP;
3141                 }
3142                 if (c == '+') {
3143                         yylval = GET_INSTRUCTION(Op_symbol);
3144                         return lasttok = INCREMENT;
3145                 }
3146                 pushback();
3147                 yylval = GET_INSTRUCTION(Op_plus);
3148                 return lasttok = '+';
3149
3150         case '!':
3151                 if ((c = nextc()) == '=') {
3152                         yylval = GET_INSTRUCTION(Op_notequal);
3153                         return lasttok = RELOP;
3154                 }
3155                 if (c == '~') {
3156                         yylval = GET_INSTRUCTION(Op_nomatch);
3157                         return lasttok = MATCHOP;
3158                 }
3159                 pushback();
3160                 yylval = GET_INSTRUCTION(Op_symbol);
3161                 return lasttok = '!';
3162
3163         case '<':
3164                 if (nextc() == '=') {
3165                         yylval = GET_INSTRUCTION(Op_leq);
3166                         return lasttok = RELOP;
3167                 }
3168                 yylval = GET_INSTRUCTION(Op_less);
3169                 pushback();
3170                 return lasttok = '<';
3171
3172         case '=':
3173                 if (nextc() == '=') {
3174                         yylval = GET_INSTRUCTION(Op_equal);
3175                         return lasttok = RELOP;
3176                 }
3177                 yylval = GET_INSTRUCTION(Op_assign);
3178                 pushback();
3179                 return lasttok = ASSIGN;
3180
3181         case '>':
3182                 if ((c = nextc()) == '=') {
3183                         yylval = GET_INSTRUCTION(Op_geq);
3184                         return lasttok = RELOP;
3185                 } else if (c == '>') {
3186                         yylval = GET_INSTRUCTION(Op_symbol);
3187                         yylval->redir_type = redirect_append;
3188                         return lasttok = IO_OUT;
3189                 }
3190                 pushback();
3191                 if (in_print && in_parens == 0) {
3192                         yylval = GET_INSTRUCTION(Op_symbol);
3193                         yylval->redir_type = redirect_output;
3194                         return lasttok = IO_OUT;
3195                 }
3196                 yylval = GET_INSTRUCTION(Op_greater);
3197                 return lasttok = '>';
3198
3199         case '~':
3200                 yylval = GET_INSTRUCTION(Op_match);
3201                 return lasttok = MATCHOP;
3202
3203         case '}':
3204                 /*
3205                  * Added did newline stuff.  Easier than
3206                  * hacking the grammar.
3207                  */
3208                 if (did_newline) {
3209                         did_newline = FALSE;
3210                         if (--in_braces == 0)
3211                                 lastline = sourceline;
3212                         return lasttok = c;
3213                 }
3214                 did_newline++;
3215                 --lexptr;       /* pick up } next time */
3216                 return lasttok = NEWLINE;
3217
3218         case '"':
3219         string:
3220                 esc_seen = FALSE;
3221                 while ((c = nextc()) != '"') {
3222                         if (c == '\n') {
3223                                 pushback();
3224                                 yyerror(_("unterminated string"));
3225                                 return lasttok = LEX_EOF;
3226                         }
3227                         if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
3228                             c == '\\') {
3229                                 c = nextc();
3230                                 if (c == '\n') {
3231                                         sourceline++;
3232                                         continue;
3233                                 }
3234                                 esc_seen = TRUE;
3235                                 if (! want_source || c != '"')
3236                                         tokadd('\\');
3237                         }
3238                         if (c == END_FILE) {
3239                                 pushback();
3240                                 yyerror(_("unterminated string"));
3241                                 return lasttok = LEX_EOF;
3242                         }
3243                         tokadd(c);
3244                 }
3245                 yylval = GET_INSTRUCTION(Op_token);
3246                 if (want_source) {
3247                         yylval->lextok = estrdup(tokstart, tok - tokstart);
3248                         return lasttok = FILENAME;
3249                 }
3250                 
3251                 yylval->opcode = Op_push_i;
3252                 yylval->memory = make_str_node(tokstart,
3253                                                 tok - tokstart, esc_seen ? SCAN : 0);
3254                 yylval->memory->flags &= ~MALLOC;
3255                 yylval->memory->flags |= PERM;
3256                 if (intlstr) {
3257                         yylval->memory->flags |= INTLSTR;
3258                         intlstr = FALSE;
3259                         if (do_intl)
3260                                 dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
3261                 }
3262                 return lasttok = YSTRING;
3263
3264         case '-':
3265                 if ((c = nextc()) == '=') {
3266                         yylval = GET_INSTRUCTION(Op_assign_minus);
3267                         return lasttok = ASSIGNOP;
3268                 }
3269                 if (c == '-') {
3270                         yylval = GET_INSTRUCTION(Op_symbol);
3271                         return lasttok = DECREMENT;
3272                 }
3273                 pushback();
3274                 yylval = GET_INSTRUCTION(Op_minus);
3275                 return lasttok = '-';
3276
3277         case '.':
3278                 c = nextc();
3279                 pushback();
3280                 if (! isdigit(c))
3281                         return lasttok = '.';
3282                 else
3283                         c = '.';
3284                 /* FALL THROUGH */
3285         case '0':
3286         case '1':
3287         case '2':
3288         case '3':
3289         case '4':
3290         case '5':
3291         case '6':
3292         case '7':
3293         case '8':
3294         case '9':
3295                 /* It's a number */
3296                 for (;;) {
3297                         int gotnumber = FALSE;
3298
3299                         tokadd(c);
3300                         switch (c) {
3301                         case 'x':
3302                         case 'X':
3303                                 if (do_traditional)
3304                                         goto done;
3305                                 if (tok == tokstart + 2) {
3306                                         int peek = nextc();
3307
3308                                         if (isxdigit(peek)) {
3309                                                 inhex = TRUE;
3310                                                 pushback();     /* following digit */
3311                                         } else {
3312                                                 pushback();     /* x or X */
3313                                                 goto done;
3314                                         }
3315                                 }
3316                                 break;
3317                         case '.':
3318                                 /* period ends exponent part of floating point number */
3319                                 if (seen_point || seen_e) {
3320                                         gotnumber = TRUE;
3321                                         break;
3322                                 }
3323                                 seen_point = TRUE;
3324                                 break;
3325                         case 'e':
3326                         case 'E':
3327                                 if (inhex)
3328                                         break;
3329                                 if (seen_e) {
3330                                         gotnumber = TRUE;
3331                                         break;
3332                                 }
3333                                 seen_e = TRUE;
3334                                 if ((c = nextc()) == '-' || c == '+') {
3335                                         int c2 = nextc();
3336
3337                                         if (isdigit(c2)) {
3338                                                 tokadd(c);
3339                                                 tokadd(c2);
3340                                         } else {
3341                                                 pushback();     /* non-digit after + or - */
3342                                                 pushback();     /* + or - */
3343                                                 pushback();     /* e or E */
3344                                         }
3345                                 } else if (! isdigit(c)) {
3346                                         pushback();     /* character after e or E */
3347                                         pushback();     /* e or E */
3348                                 } else {
3349                                         pushback();     /* digit */
3350                                 }
3351                                 break;
3352                         case 'a':
3353                         case 'A':
3354                         case 'b':
3355                         case 'B':
3356                         case 'c':
3357                         case 'C':
3358                         case 'D':
3359                         case 'd':
3360                         case 'f':
3361                         case 'F':
3362                                 if (do_traditional || ! inhex)
3363                                         goto done;
3364                                 /* fall through */
3365                         case '0':
3366                         case '1':
3367                         case '2':
3368                         case '3':
3369                         case '4':
3370                         case '5':
3371                         case '6':
3372                         case '7':
3373                         case '8':
3374                         case '9':
3375                                 break;
3376                         default:
3377                         done:
3378                                 gotnumber = TRUE;
3379                         }
3380                         if (gotnumber)
3381                                 break;
3382                         c = nextc();
3383                 }
3384                 pushback();
3385
3386                 tokadd('\0');
3387                 yylval = GET_INSTRUCTION(Op_push_i);
3388                 if (! do_traditional && isnondecimal(tokstart, FALSE)) {
3389                         if (do_lint) {
3390                                 if (isdigit((unsigned char) tokstart[1]))       /* not an 'x' or 'X' */
3391                                         lintwarn("numeric constant `%.*s' treated as octal",
3392                                                 (int) strlen(tokstart)-1, tokstart);
3393                                 else if (tokstart[1] == 'x' || tokstart[1] == 'X')
3394                                         lintwarn("numeric constant `%.*s' treated as hexadecimal",
3395                                                 (int) strlen(tokstart)-1, tokstart);
3396                         }
3397                         yylval->memory = mk_number(nondec2awknum(tokstart, strlen(tokstart)),
3398                                                                                         PERM|NUMCUR|NUMBER);
3399                 } else
3400                         yylval->memory = mk_number(atof(tokstart), PERM|NUMCUR|NUMBER);
3401                 return lasttok = YNUMBER;
3402
3403         case '&':
3404                 if ((c = nextc()) == '&') {
3405                         yylval = GET_INSTRUCTION(Op_and);
3406                         allow_newline();
3407                         return lasttok = LEX_AND;
3408                 }
3409                 pushback();
3410                 yylval = GET_INSTRUCTION(Op_symbol);
3411                 return lasttok = '&';
3412
3413         case '|':
3414                 if ((c = nextc()) == '|') {
3415                         yylval = GET_INSTRUCTION(Op_or);
3416                         allow_newline();
3417                         return lasttok = LEX_OR;
3418                 } else if (! do_traditional && c == '&') {
3419                         yylval = GET_INSTRUCTION(Op_symbol);
3420                         yylval->redir_type = redirect_twoway;
3421                         return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
3422                 }
3423                 pushback();
3424                 if (in_print && in_parens == 0) {
3425                         yylval = GET_INSTRUCTION(Op_symbol);
3426                         yylval->redir_type = redirect_pipe;
3427                         return lasttok = IO_OUT;
3428                 } else {
3429                         yylval = GET_INSTRUCTION(Op_symbol);
3430                         yylval->redir_type = redirect_pipein;
3431                         return lasttok = IO_IN;
3432                 }
3433         }
3434
3435         if (c != '_' && ! isalpha(c)) {
3436                 yyerror(_("invalid char '%c' in expression"), c);
3437                 return lasttok = LEX_EOF;
3438         }
3439
3440         /*
3441          * Lots of fog here.  Consider:
3442          *
3443          * print "xyzzy"$_"foo"
3444          *
3445          * Without the check for ` lasttok != '$' ', this is parsed as
3446          *
3447          * print "xxyzz" $(_"foo")
3448          *
3449          * With the check, it is "correctly" parsed as three
3450          * string concatenations.  Sigh.  This seems to be
3451          * "more correct", but this is definitely one of those
3452          * occasions where the interactions are funny.
3453          */
3454         if (! do_traditional && c == '_' && lasttok != '$') {
3455                 if ((c = nextc()) == '"') {
3456                         intlstr = TRUE;
3457                         goto string;
3458                 }
3459                 pushback();
3460                 c = '_';
3461         }
3462
3463         /* it's some type of name-type-thing.  Find its length. */
3464         tok = tokstart;
3465         while (c != END_FILE && is_identchar(c)) {
3466                 tokadd(c);
3467                 c = nextc();
3468         }
3469         tokadd('\0');
3470         pushback();
3471
3472         /* See if it is a special token. */
3473         if ((mid = check_special(tokstart)) >= 0) {
3474                 static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
3475                 int class = tokentab[mid].class;
3476
3477                 if ((class == LEX_INCLUDE || class == LEX_EVAL)
3478                                 && lasttok != '@')
3479                         goto out;
3480
3481                 if (do_lint) {
3482                         if ((tokentab[mid].flags & GAWKX) && ! (warntab[mid] & GAWKX)) {
3483                                 lintwarn(_("`%s' is a gawk extension"),
3484                                         tokentab[mid].operator);
3485                                 warntab[mid] |= GAWKX;
3486                         }
3487                         if ((tokentab[mid].flags & RESX) && ! (warntab[mid] & RESX)) {
3488                                 lintwarn(_("`%s' is a Bell Labs extension"),
3489                                         tokentab[mid].operator);
3490                                 warntab[mid] |= RESX;
3491                         }
3492                         if ((tokentab[mid].flags & NOT_POSIX) && ! (warntab[mid] & NOT_POSIX)) {
3493                                 lintwarn(_("POSIX does not allow `%s'"),
3494                                         tokentab[mid].operator);
3495                                 warntab[mid] |= NOT_POSIX;
3496                         }
3497                 }
3498                 if (do_lint_old && (tokentab[mid].flags & NOT_OLD)
3499                                  && ! (warntab[mid] & NOT_OLD)
3500                 ) {
3501                         warning(_("`%s' is not supported in old awk"),
3502                                         tokentab[mid].operator);
3503                         warntab[mid] |= NOT_OLD;
3504                 }
3505
3506                 if (tokentab[mid].flags & BREAK)
3507                         break_allowed++;
3508                 if (tokentab[mid].flags & CONTINUE)
3509                         continue_allowed++;
3510
3511                 switch (class) {
3512                 case LEX_INCLUDE:
3513                         want_source = TRUE;
3514                         break;
3515                 case LEX_EVAL:
3516                         if (in_main_context())
3517                                 goto out;
3518                         emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
3519                         tokkey[0] = '@';
3520                         memcpy(tokkey + 1, tokstart, tok - tokstart);
3521                         yylval = GET_INSTRUCTION(Op_token);
3522                         yylval->lextok = tokkey;
3523                         break;
3524
3525                 case LEX_FUNCTION:
3526                 case LEX_BEGIN:
3527                 case LEX_END:
3528                 case LEX_BEGINFILE:
3529                 case LEX_ENDFILE:               
3530                         yylval = bcalloc(tokentab[mid].value, 3, sourceline);
3531                         break;
3532
3533                 case LEX_FOR:
3534                 case LEX_WHILE:
3535                 case LEX_DO:
3536                 case LEX_SWITCH:
3537                         if (! do_profiling)
3538                                 return lasttok = class;
3539                         /* fall through */
3540                 case LEX_CASE:
3541                         yylval = bcalloc(tokentab[mid].value, 2, sourceline);
3542                         break;
3543
3544                 default:
3545                         yylval = GET_INSTRUCTION(tokentab[mid].value);
3546                         if (class == LEX_BUILTIN || class == LEX_LENGTH)
3547                                 yylval->builtin_idx = mid;
3548                         break;
3549                 }
3550                 return lasttok = class;
3551         }
3552 out:
3553         tokkey = estrdup(tokstart, tok - tokstart);
3554         if (*lexptr == '(') {
3555                 yylval = bcalloc(Op_token, 2, sourceline);
3556                 yylval->lextok = tokkey;        
3557                 return lasttok = FUNC_CALL;
3558         } else {
3559                 static short goto_warned = FALSE;
3560
3561                 yylval = GET_INSTRUCTION(Op_token);
3562                 yylval->lextok = tokkey;
3563
3564 #define SMART_ALECK     1
3565                 if (SMART_ALECK && do_lint
3566                     && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
3567                         goto_warned = TRUE;
3568                         lintwarn(_("`goto' considered harmful!\n"));
3569                 }
3570                 return lasttok = NAME;
3571         }
3572
3573 #undef GET_INSTRUCTION
3574 #undef NEWLINE_EOF
3575 }
3576
3577 /* mk_symbol --- allocates a symbol for the symbol table. */
3578
3579 NODE *
3580 mk_symbol(NODETYPE type, NODE *value)
3581 {
3582         NODE *r;
3583
3584         getnode(r);
3585         r->type = type;
3586         r->flags = MALLOC;
3587         r->lnode = value;
3588         r->rnode = NULL;
3589         r->parent_array = NULL;
3590         r->var_assign = (Func_ptr) 0;
3591         return r;
3592 }
3593
3594 /* snode --- instructions for builtin functions. Checks for arg. count
3595              and supplies defaults where possible. */
3596
3597 static INSTRUCTION *
3598 snode(INSTRUCTION *subn, INSTRUCTION *r)
3599 {
3600         INSTRUCTION *arg;
3601         INSTRUCTION *ip;
3602         NODE *n;
3603         int nexp = 0;
3604         int args_allowed;
3605         int idx = r->builtin_idx;
3606
3607         if (subn != NULL) {
3608                 INSTRUCTION *tp;
3609                 for (tp = subn->nexti; tp; tp = tp->nexti) {
3610                         tp = tp->lasti;
3611                         nexp++;
3612                 }
3613                 assert(nexp > 0);
3614         }               
3615
3616         /* check against how many args. are allowed for this builtin */
3617         args_allowed = tokentab[idx].flags & ARGS;
3618         if (args_allowed && (args_allowed & A(nexp)) == 0) {
3619                 yyerror(_("%d is invalid as number of arguments for %s"),
3620                                 nexp, tokentab[idx].operator);
3621                 return NULL;
3622         }
3623
3624         /* special processing for sub, gsub and gensub */
3625
3626         if (tokentab[idx].value == Op_sub_builtin) {
3627                 const char *operator = tokentab[idx].operator;
3628
3629                 r->sub_flags = 0;
3630
3631                 arg = subn->nexti;              /* first arg list */
3632                 (void) mk_rexp(arg);
3633
3634                 if (strcmp(operator, "gensub") != 0) {
3635                         /* sub and gsub */
3636
3637                         if (strcmp(operator, "gsub") == 0)
3638                                 r->sub_flags |= GSUB;
3639
3640                         arg = arg->lasti->nexti;        /* 2nd arg list */
3641                         if (nexp == 2) {
3642                                 INSTRUCTION *expr;
3643
3644                                 expr = list_create(instruction(Op_push_i));
3645                                 expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
3646                                 (void) mk_expression_list(subn,
3647                                                 list_append(expr, instruction(Op_field_spec)));
3648                         }
3649
3650                         arg = arg->lasti->nexti;        /* third arg list */
3651                         ip = arg->lasti;
3652                         if (ip->opcode == Op_push_i) {
3653                                 if (do_lint)
3654                                         lintwarn(_("%s: string literal as last arg of substitute has no effect"),
3655                                                 operator);
3656                                 r->sub_flags |= LITERAL;
3657                         } else {
3658                                 if (make_assignable(ip) == NULL)
3659                                         yyerror(_("%s third parameter is not a changeable object"),
3660                                                 operator);
3661                                 else
3662                                         ip->do_reference = TRUE;
3663                         }
3664
3665                         r->expr_count = count_expressions(&subn, FALSE);
3666                         ip = subn->lasti;
3667
3668                         (void) list_append(subn, r);
3669
3670                         /* add after_assign code */
3671                         if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
3672                                 (void) list_append(subn, instruction(Op_var_assign));
3673                                 subn->lasti->assign_ctxt = Op_sub_builtin;
3674                                 subn->lasti->assign_var = ip->memory->var_assign;
3675                         } else if (ip->opcode == Op_field_spec_lhs) {
3676                                 (void) list_append(subn, instruction(Op_field_assign));
3677                                 subn->lasti->assign_ctxt = Op_sub_builtin;
3678                                 subn->lasti->field_assign = (Func_ptr) 0;
3679                                 ip->target_assign = subn->lasti;
3680                         }
3681                         return subn;    
3682
3683                 } else {
3684                         /* gensub */
3685
3686                         r->sub_flags |= GENSUB;
3687                         if (nexp == 3) {
3688                                 ip = instruction(Op_push_i);
3689                                 ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
3690                                 (void) mk_expression_list(subn,
3691                                                 list_append(list_create(ip), instruction(Op_field_spec)));
3692                         }
3693
3694                         r->expr_count = count_expressions(&subn, FALSE);
3695                         return list_append(subn, r);
3696                 }
3697         }
3698
3699         r->builtin = tokentab[idx].ptr;
3700
3701         /* special case processing for a few builtins */
3702
3703         if (r->builtin == do_length) {
3704                 if (nexp == 0) {                
3705                     /* no args. Use $0 */
3706
3707                         INSTRUCTION *list;
3708                         r->expr_count = 1;                      
3709                         list = list_create(r);
3710                         (void) list_prepend(list, instruction(Op_field_spec));
3711                         (void) list_prepend(list, instruction(Op_push_i));
3712                         list->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
3713                         return list; 
3714                 } else {
3715                         arg = subn->nexti;
3716                         if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3717                                 arg->nexti->opcode = Op_push_arg;       /* argument may be array */
3718                 }
3719         } else if (r->builtin == do_isarray) {
3720                 arg = subn->nexti;
3721                 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
3722                         arg->nexti->opcode = Op_push_arg;       /* argument may be array */
3723         } else if (r->builtin == do_match) {
3724                 static short warned = FALSE;
3725
3726                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
3727                 (void) mk_rexp(arg);
3728
3729                 if (nexp == 3) {        /* 3rd argument there */
3730                         if (do_lint && ! warned) {
3731                                 warned = TRUE;
3732                                 lintwarn(_("match: third argument is a gawk extension"));
3733                         }
3734                         if (do_traditional) {
3735                                 yyerror(_("match: third argument is a gawk extension"));
3736                                 return NULL;
3737                         }
3738
3739                         arg = arg->lasti->nexti;        /* third arg list */
3740                         ip = arg->lasti;
3741                         if (/*ip == arg->nexti  && */ ip->opcode == Op_push)
3742                                 ip->opcode = Op_push_array;
3743                 }
3744         } else if (r->builtin == do_split) {
3745                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
3746                 ip = arg->lasti;
3747                 if (ip->opcode == Op_push)
3748                         ip->opcode = Op_push_array;
3749                 if (nexp == 2) {
3750                         INSTRUCTION *expr;
3751                         expr = list_create(instruction(Op_push));
3752                         expr->nexti->memory = FS_node;
3753                         (void) mk_expression_list(subn, expr);
3754                 }
3755                 arg = arg->lasti->nexti;
3756                 n = mk_rexp(arg);
3757                 if (nexp == 2)
3758                         n->re_flags |= FS_DFLT;
3759                 if (nexp == 4) {
3760                         arg = arg->lasti->nexti;
3761                         ip = arg->lasti;
3762                         if (ip->opcode == Op_push)
3763                                 ip->opcode = Op_push_array;
3764                 }
3765         } else if (r->builtin == do_patsplit) {
3766                 arg = subn->nexti->lasti->nexti;        /* 2nd arg list */
3767                 ip = arg->lasti;
3768                 if (ip->opcode == Op_push)
3769                         ip->opcode = Op_push_array;
3770                 if (nexp == 2) {
3771                         INSTRUCTION *expr;
3772                         expr = list_create(instruction(Op_push));
3773                         expr->nexti->memory = FPAT_node;
3774                         (void) mk_expression_list(subn, expr);
3775                 }
3776                 arg = arg->lasti->nexti;
3777                 n = mk_rexp(arg);
3778                 if (nexp == 4) {
3779                         arg = arg->lasti->nexti;
3780                         ip = arg->lasti;
3781                         if (ip->opcode == Op_push)
3782                                 ip->opcode = Op_push_array;
3783                 }
3784         } else if (r->builtin == do_close) {
3785                 static short warned = FALSE;
3786                 if (nexp == 2) {
3787                         if (do_lint && ! warned) {
3788                                 warned = TRUE;
3789                                 lintwarn(_("close: second argument is a gawk extension"));
3790                         }
3791                         if (do_traditional) {
3792                                 yyerror(_("close: second argument is a gawk extension"));
3793                                 return NULL;
3794                         }
3795                 }
3796         } else if (do_intl                                      /* --gen-po */
3797                         && r->builtin == do_dcgettext           /* dcgettext(...) */
3798                         && subn->nexti->lasti->opcode == Op_push_i      /* 1st arg is constant */
3799                         && (subn->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
3800                 /* ala xgettext, dcgettext("some string" ...) dumps the string */
3801                 NODE *str = subn->nexti->lasti->memory;
3802
3803                 if ((str->flags & INTLSTR) != 0)
3804                         warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
3805                         /* don't dump it, the lexer already did */
3806                 else
3807                         dumpintlstr(str->stptr, str->stlen);
3808         } else if (do_intl                                      /* --gen-po */
3809                         && r->builtin == do_dcngettext          /* dcngettext(...) */
3810                         && subn->nexti->lasti->opcode == Op_push_i      /* 1st arg is constant */
3811                         && (subn->nexti->lasti->memory->flags & STRCUR) != 0    /* it's a string constant */
3812                         && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i        /* 2nd arg is constant too */
3813                         && (subn->nexti->lasti->nexti->lasti->memory->flags & STRCUR) != 0) {   /* it's a string constant */
3814                 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
3815                 NODE *str1 = subn->nexti->lasti->memory;
3816                 NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
3817
3818                 if (((str1->flags | str2->flags) & INTLSTR) != 0)
3819                         warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
3820                 else
3821                         dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
3822         } else if (r->builtin == do_asort || r->builtin == do_asorti) {
3823                 arg = subn->nexti;      /* 1st arg list */
3824                 ip = arg->lasti;
3825                 if (ip->opcode == Op_push)
3826                         ip->opcode = Op_push_array;
3827                 if (nexp >= 2) {
3828                         arg = ip->nexti;
3829                         ip = arg->lasti;
3830                         if (ip->opcode == Op_push)
3831                                 ip->opcode = Op_push_array;
3832                 }
3833         }
3834 #ifdef ARRAYDEBUG
3835         else if (r->builtin == do_adump) {
3836                 ip = subn->nexti->lasti;
3837                 if (ip->opcode == Op_push)
3838                         ip->opcode = Op_push_array;
3839         }
3840 #endif          
3841
3842         if (subn != NULL) {
3843                 r->expr_count = count_expressions(&subn, FALSE);
3844                 return list_append(subn, r);
3845         }
3846
3847         r->expr_count = 0;
3848         return list_create(r);
3849 }
3850
3851 /* append_param --- append PNAME to the list of parameters
3852  *                  for the current function.
3853  */
3854
3855 static void
3856 append_param(char *pname)
3857 {
3858         static NODE *savetail = NULL;
3859         NODE *p;
3860
3861         p = make_param(pname);
3862         if (func_params == NULL) {
3863                 func_params = p;
3864                 savetail = p;
3865         } else if (savetail != NULL) {
3866                 savetail->rnode = p;
3867                 savetail = p;
3868         }
3869 }
3870
3871 /* dup_parms --- return TRUE if there are duplicate parameters */
3872
3873 static int
3874 dup_parms(INSTRUCTION *fp, NODE *func)
3875 {
3876         NODE *np;
3877         const char *fname, **names;
3878         int count, i, j, dups;
3879         NODE *params;
3880
3881         if (func == NULL)       /* error earlier */
3882                 return TRUE;
3883
3884         fname = func->param;
3885         count = func->param_cnt;
3886         params = func->rnode;
3887
3888         if (count == 0)         /* no args, no problem */
3889                 return FALSE;
3890
3891         if (params == NULL)     /* error earlier */
3892                 return TRUE;
3893
3894         emalloc(names, const char **, count * sizeof(char *), "dup_parms");
3895
3896         i = 0;
3897         for (np = params; np != NULL; np = np->rnode) {
3898                 if (np->param == NULL) { /* error earlier, give up, go home */
3899                         efree(names);
3900                         return TRUE;
3901                 }
3902                 names[i++] = np->param;
3903         }
3904
3905         dups = 0;
3906         for (i = 1; i < count; i++) {
3907                 for (j = 0; j < i; j++) {
3908                         if (strcmp(names[i], names[j]) == 0) {
3909                                 dups++;
3910                                 error_ln(fp->source_line,
3911         _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
3912                                         fname, i + 1, names[j], j+1);
3913                         }
3914                 }
3915         }
3916
3917         efree(names);
3918         return (dups > 0 ? TRUE : FALSE);
3919 }
3920
3921 /* parms_shadow --- check if parameters shadow globals */
3922
3923 static int
3924 parms_shadow(INSTRUCTION *pc, int *shadow)
3925 {
3926         int pcount, i;
3927         int ret = FALSE;
3928         NODE *func;
3929         char *fname;
3930
3931         func = pc->func_body;
3932         fname = func->lnode->param;
3933         
3934 #if 0   /* can't happen, already exited if error ? */
3935         if (fname == NULL || func == NULL)      /* error earlier */
3936                 return FALSE;
3937 #endif
3938
3939         pcount = func->lnode->param_cnt;
3940
3941         if (pcount == 0)                /* no args, no problem */
3942                 return 0;
3943
3944         source = pc->source_file;
3945         sourceline = pc->source_line;
3946         /*
3947          * Use warning() and not lintwarn() so that can warn
3948          * about all shadowed parameters.
3949          */
3950         for (i = 0; i < pcount; i++) {
3951                 if (lookup(func->parmlist[i]) != NULL) {
3952                         warning(
3953         _("function `%s': parameter `%s' shadows global variable"),
3954                                         fname, func->parmlist[i]);
3955                         ret = TRUE;
3956                 }
3957         }
3958
3959         *shadow |= ret;
3960         return 0;
3961 }
3962
3963
3964 /*
3965  * install_symbol:
3966  * Install a name in the symbol table, even if it is already there.
3967  * Caller must check against redefinition if that is desired. 
3968  */
3969
3970
3971 NODE *
3972 install_symbol(char *name, NODE *value)
3973 {
3974         NODE *hp;
3975         size_t len;
3976         int bucket;
3977
3978         if (install_func)
3979                 (*install_func)(name);
3980
3981         var_count++;
3982         len = strlen(name);
3983         bucket = hash(name, len, (unsigned long) HASHSIZE, NULL);
3984         getnode(hp);
3985         hp->type = Node_hashnode;
3986         hp->hnext = variables[bucket];
3987         variables[bucket] = hp;
3988         hp->hlength = len;
3989         hp->hvalue = value;
3990         hp->hname = name;
3991         hp->hvalue->vname = name;
3992         return hp->hvalue;
3993 }
3994
3995 /* lookup --- find the most recent hash node for name installed by install_symbol */
3996
3997 NODE *
3998 lookup(const char *name)
3999 {
4000         NODE *bucket;
4001         size_t len;
4002
4003         len = strlen(name);
4004         for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE, NULL)];
4005                         bucket != NULL; bucket = bucket->hnext)
4006                 if (bucket->hlength == len && strncmp(bucket->hname, name, len) == 0)
4007                         return bucket->hvalue;
4008         return NULL;
4009 }
4010
4011 /* sym_comp --- compare two symbol (variable or function) names */
4012
4013 static int
4014 sym_comp(const void *v1, const void *v2)
4015 {
4016         const NODE *const *npp1, *const *npp2;
4017         const NODE *n1, *n2;
4018         int minlen;
4019
4020         npp1 = (const NODE *const *) v1;
4021         npp2 = (const NODE *const *) v2;
4022         n1 = *npp1;
4023         n2 = *npp2;
4024
4025         if (n1->hlength > n2->hlength)
4026                 minlen = n1->hlength;
4027         else
4028                 minlen = n2->hlength;
4029
4030         return strncmp(n1->hname, n2->hname, minlen);
4031 }
4032
4033 /* valinfo --- dump var info */
4034
4035 void
4036 valinfo(NODE *n, int (*print_func)(FILE *, const char *, ...), FILE *fp)
4037 {
4038         if (n == Nnull_string)
4039                 print_func(fp, "uninitialized scalar\n");
4040         else if (n->flags & STRING) {
4041                 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE);
4042                 print_func(fp, "\n");
4043         } else if (n->flags & NUMBER)
4044                 print_func(fp, "%.17g\n", n->numbr);
4045         else if (n->flags & STRCUR) {
4046                 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE);
4047                 print_func(fp, "\n");
4048         } else if (n->flags & NUMCUR)
4049                 print_func(fp, "%.17g\n", n->numbr);
4050         else
4051                 print_func(fp, "?? flags %s\n", flags2str(n->flags));
4052 }
4053
4054 /* get_varlist --- list of global variables */
4055
4056 NODE **
4057 get_varlist()
4058 {
4059         int i, j;
4060         NODE **table;
4061         NODE *p;
4062
4063         emalloc(table, NODE **, (var_count + 1) * sizeof(NODE *), "get_varlist");
4064         update_global_values();
4065         for (i = j = 0; i < HASHSIZE; i++)
4066                 for (p = variables[i]; p != NULL; p = p->hnext)
4067                         table[j++] = p;
4068         assert(j == var_count);
4069
4070         /* Shazzam! */
4071         qsort(table, j, sizeof(NODE *), sym_comp);
4072
4073         table[j] = NULL;
4074         return table;
4075 }
4076
4077 /* print_vars --- print names and values of global variables */ 
4078
4079 void
4080 print_vars(int (*print_func)(FILE *, const char *, ...), FILE *fp)
4081 {
4082         int i;
4083         NODE **table;
4084         NODE *p;
4085
4086         table = get_varlist();
4087         for (i = 0; (p = table[i]) != NULL; i++) {
4088                 if (p->hvalue->type == Node_func)
4089                         continue;
4090                 print_func(fp, "%.*s: ", (int) p->hlength, p->hname);
4091                 if (p->hvalue->type == Node_var_array)
4092                         print_func(fp, "array, %ld elements\n", p->hvalue->table_size);
4093                 else if (p->hvalue->type == Node_var_new)
4094                         print_func(fp, "untyped variable\n");
4095                 else if (p->hvalue->type == Node_var)
4096                         valinfo(p->hvalue->var_value, print_func, fp);
4097         }
4098         efree(table);
4099 }
4100
4101 /* dump_vars --- dump the symbol table */
4102
4103 void
4104 dump_vars(const char *fname)
4105 {
4106         FILE *fp;
4107
4108         if (fname == NULL)
4109                 fp = stderr;
4110         else if ((fp = fopen(fname, "w")) == NULL) {
4111                 warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
4112                 warning(_("sending variable list to standard error"));
4113                 fp = stderr;
4114         }
4115
4116         print_vars(fprintf, fp);
4117         if (fp != stderr && fclose(fp) != 0)
4118                 warning(_("%s: close failed (%s)"), fname, strerror(errno));
4119 }
4120
4121 /* release_all_vars --- free all variable memory */
4122
4123 void
4124 release_all_vars()
4125 {
4126         int i;
4127         NODE *p, *next;
4128         
4129         for (i = 0; i < HASHSIZE; i++) {
4130                 for (p = variables[i]; p != NULL; p = next) {
4131                         next = p->hnext;
4132
4133                         if (p->hvalue->type == Node_func)
4134                                 continue;
4135                         else if (p->hvalue->type == Node_var_array)
4136                                 assoc_clear(p->hvalue);
4137                         else if (p->hvalue->type != Node_var_new)
4138                                 unref(p->hvalue->var_value);
4139
4140                         efree(p->hname);
4141                         freenode(p->hvalue);
4142                         freenode(p);
4143                 }
4144         }                                                                    
4145 }
4146
4147 /* dump_funcs --- print all functions */
4148
4149 void
4150 dump_funcs()
4151 {
4152         if (func_count <= 0)
4153                 return;
4154
4155         (void) foreach_func((int (*)(INSTRUCTION *, void *)) pp_func, TRUE, (void *) 0);
4156 }
4157
4158 /* shadow_funcs --- check all functions for parameters that shadow globals */
4159
4160 void
4161 shadow_funcs()
4162 {
4163         static int calls = 0;
4164         int shadow = FALSE;
4165
4166         if (func_count <= 0)
4167                 return;
4168
4169         if (calls++ != 0)
4170                 fatal(_("shadow_funcs() called twice!"));
4171
4172         (void) foreach_func((int (*)(INSTRUCTION *, void *)) parms_shadow, TRUE, &shadow);
4173
4174         /* End with fatal if the user requested it.  */
4175         if (shadow && lintfunc != warning)
4176                 lintwarn(_("there were shadowed variables."));
4177 }
4178
4179 /*
4180  * func_install:
4181  * check if name is already installed;  if so, it had better have Null value,
4182  * in which case def is added as the value. Otherwise, install name with def
4183  * as value. 
4184  *
4185  * Extra work, build up and save a list of the parameter names in a table
4186  * and hang it off params->parmlist. This is used to set the `vname' field
4187  * of each function parameter during a function call. See eval.c.
4188  */
4189
4190 static int
4191 func_install(INSTRUCTION *func, INSTRUCTION *def)
4192 {
4193         NODE *params;
4194         NODE *r, *n, *thisfunc, *hp;
4195         char **pnames = NULL;
4196         char *fname;
4197         int pcount = 0;
4198         int i;
4199
4200         params = func_params;
4201
4202         /* check for function foo(foo) { ... }.  bleah. */
4203         for (n = params->rnode; n != NULL; n = n->rnode) {
4204                 if (strcmp(n->param, params->param) == 0) {
4205                         error_ln(func->source_line,
4206                                 _("function `%s': can't use function name as parameter name"), params->param);
4207                         return -1;
4208                 } else if (is_std_var(n->param)) {
4209                         error_ln(func->source_line,
4210                                 _("function `%s': can't use special variable `%s' as a function parameter"),
4211                                         params->param, n->param);
4212                         return -1;
4213                 }
4214         }
4215
4216         thisfunc = NULL;        /* turn off warnings */
4217
4218         fname = params->param;
4219         /* symbol table management */
4220         hp = remove_symbol(params->param);  /* remove function name out of symbol table */ 
4221         if (hp != NULL)
4222                 freenode(hp);
4223         r = lookup(fname);
4224         if (r != NULL) {
4225                 error_ln(func->source_line,
4226                          _("function name `%s' previously defined"), fname);
4227                 return -1;
4228         } else if (fname == builtin_func)       /* not a valid function name */
4229                 goto remove_params;
4230
4231         /* add an implicit return at end;
4232          * also used by 'return' command in debugger
4233          */
4234       
4235         (void) list_append(def, instruction(Op_push_i));
4236         def->lasti->memory = Nnull_string;
4237         (void) list_append(def, instruction(Op_K_return));
4238
4239         if (do_profiling)
4240                 (void) list_prepend(def, instruction(Op_exec_count));
4241
4242         /* func->opcode is Op_func */
4243         (func + 1)->firsti = def->nexti;
4244         (func + 1)->lasti = def->lasti;
4245         (func + 2)->first_line = func->source_line;
4246         (func + 2)->last_line = lastline;
4247
4248         func->nexti = def->nexti;
4249         bcfree(def);
4250
4251         (void) list_append(rule_list, func + 1);        /* debugging */
4252
4253         /* install the function */
4254         thisfunc = mk_symbol(Node_func, params);
4255         (void) install_symbol(fname, thisfunc);
4256         thisfunc->code_ptr = func;
4257         func->func_body = thisfunc;
4258
4259         for (n = params->rnode; n != NULL; n = n->rnode)
4260                 pcount++;
4261
4262         if (pcount != 0) {
4263                 emalloc(pnames, char **, (pcount + 1) * sizeof(char *), "func_install");
4264                 for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode)
4265                         pnames[i] = n->param;
4266                 pnames[pcount] = NULL;
4267         }
4268         thisfunc->parmlist = pnames;
4269
4270         /* update lint table info */
4271         func_use(fname, FUNC_DEFINE);
4272
4273         func_count++;   /* used in profiler / pretty printer */
4274
4275 remove_params:
4276         /* remove params from symbol table */
4277         pop_params(params->rnode);
4278         return 0;
4279 }
4280
4281 /* remove_symbol --- remove a variable from the symbol table */
4282
4283 NODE *
4284 remove_symbol(char *name)
4285 {
4286         NODE *bucket, **save;
4287         size_t len;
4288
4289         len = strlen(name);
4290         save = &(variables[hash(name, len, (unsigned long) HASHSIZE, NULL)]);
4291         for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
4292                 if (len == bucket->hlength && strncmp(bucket->hname, name, len) == 0) {
4293                         var_count--;
4294                         *save = bucket->hnext;
4295                         return bucket;
4296                 }
4297                 save = &(bucket->hnext);
4298         }
4299         return NULL;
4300 }
4301
4302 /* pop_params --- remove list of function parameters from symbol table */
4303
4304 /*
4305  * pop parameters out of the symbol table. do this in reverse order to
4306  * avoid reading freed memory if there were duplicated parameters.
4307  */
4308 static void
4309 pop_params(NODE *params)
4310 {
4311         NODE *hp;
4312         if (params == NULL)
4313                 return;
4314         pop_params(params->rnode);
4315         hp = remove_symbol(params->param);
4316         if (hp != NULL)
4317                 freenode(hp);
4318 }
4319
4320 /* make_param --- make NAME into a function parameter */
4321
4322 static NODE *
4323 make_param(char *name)
4324 {
4325         NODE *r;
4326
4327         getnode(r);
4328         r->type = Node_param_list;
4329         r->rnode = NULL;
4330         r->param_cnt = param_counter++;
4331         return (install_symbol(name, r));
4332 }
4333
4334 static struct fdesc {
4335         char *name;
4336         short used;
4337         short defined;
4338         struct fdesc *next;
4339 } *ftable[HASHSIZE];
4340
4341 /* func_use --- track uses and definitions of functions */
4342
4343 static void
4344 func_use(const char *name, enum defref how)
4345 {
4346         struct fdesc *fp;
4347         int len;
4348         int ind;
4349
4350         len = strlen(name);
4351         ind = hash(name, len, HASHSIZE, NULL);
4352
4353         for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
4354                 if (strcmp(fp->name, name) == 0) {
4355                         if (how == FUNC_DEFINE)
4356                                 fp->defined++;
4357                         else
4358                                 fp->used++;
4359                         return;
4360                 }
4361         }
4362
4363         /* not in the table, fall through to allocate a new one */
4364
4365         emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
4366         memset(fp, '\0', sizeof(struct fdesc));
4367         emalloc(fp->name, char *, len + 1, "func_use");
4368         strcpy(fp->name, name);
4369         if (how == FUNC_DEFINE)
4370                 fp->defined++;
4371         else
4372                 fp->used++;
4373         fp->next = ftable[ind];
4374         ftable[ind] = fp;
4375 }
4376
4377 /* check_funcs --- verify functions that are called but not defined */
4378
4379 static void
4380 check_funcs()
4381 {
4382         struct fdesc *fp, *next;
4383         int i;
4384
4385         if (! in_main_context())
4386                 goto free_mem;
4387  
4388         for (i = 0; i < HASHSIZE; i++) {
4389                 for (fp = ftable[i]; fp != NULL; fp = fp->next) {
4390 #ifdef REALLYMEAN
4391                         /* making this the default breaks old code. sigh. */
4392                         if (fp->defined == 0) {
4393                                 error(
4394                 _("function `%s' called but never defined"), fp->name);
4395                                 errcount++;
4396                         }
4397 #else
4398                         if (do_lint && fp->defined == 0)
4399                                 lintwarn(
4400                 _("function `%s' called but never defined"), fp->name);
4401 #endif
4402                         if (do_lint && fp->used == 0) {
4403                                 lintwarn(_("function `%s' defined but never called directly"),
4404                                         fp->name);
4405                         }
4406                 }
4407         }
4408
4409 free_mem:
4410         /* now let's free all the memory */
4411         for (i = 0; i < HASHSIZE; i++) {
4412                 for (fp = ftable[i]; fp != NULL; fp = next) {
4413                         next = fp->next;
4414                         efree(fp->name);
4415                         efree(fp);
4416                 }
4417                 ftable[i] = NULL;
4418         }
4419 }
4420
4421 /* param_sanity --- look for parameters that are regexp constants */
4422
4423 static void
4424 param_sanity(INSTRUCTION *arglist)
4425 {
4426         INSTRUCTION *argl, *arg;
4427         int i = 1;
4428
4429         if (arglist == NULL)
4430                 return;
4431         for (argl = arglist->nexti; argl; ) {
4432                 arg = argl->lasti;
4433                 if (arg->opcode == Op_match_rec)
4434                         warning_ln(arg->source_line,
4435                                 _("regexp constant for parameter #%d yields boolean value"), i);
4436                 argl = arg->nexti;
4437                 i++;
4438         }
4439 }
4440
4441 /* foreach_func --- execute given function for each awk function in symbol table. */
4442
4443 int
4444 foreach_func(int (*pfunc)(INSTRUCTION *, void *), int sort, void *data)
4445 {
4446         int i, j;
4447         NODE *p;
4448         int ret = 0;
4449
4450         if (sort) {
4451                 NODE **tab;
4452
4453                 /*
4454                  * Walk through symbol table counting functions.
4455                  * Could be more than func_count if there are
4456                  * extension functions.
4457                  */
4458                 for (i = j = 0; i < HASHSIZE; i++) {
4459                         for (p = variables[i]; p != NULL; p = p->hnext) {
4460                                 if (p->hvalue->type == Node_func) {
4461                                         j++;
4462                                 }
4463                         }
4464                 }
4465
4466                 if (j == 0)
4467                         return 0;
4468
4469                 emalloc(tab, NODE **, j * sizeof(NODE *), "foreach_func");
4470
4471                 /* now walk again, copying info */
4472                 for (i = j = 0; i < HASHSIZE; i++) {
4473                         for (p = variables[i]; p != NULL; p = p->hnext) {
4474                                 if (p->hvalue->type == Node_func) {
4475                                         tab[j] = p;
4476                                         j++;
4477                                 }
4478                         }
4479                 }
4480
4481                 /* Shazzam! */
4482                 qsort(tab, j, sizeof(NODE *), sym_comp);
4483
4484                 for (i = 0; i < j; i++) {
4485                         if ((ret = pfunc(tab[i]->hvalue->code_ptr, data)) != 0)
4486                                 break;
4487                 }
4488
4489                 efree(tab);
4490                 return ret;
4491         }
4492
4493         /* unsorted */
4494         for (i = 0; i < HASHSIZE; i++) {
4495                 for (p = variables[i]; p != NULL; p = p->hnext) {
4496                         if (p->hvalue->type == Node_func
4497                                         && (ret = pfunc(p->hvalue->code_ptr, data)) != 0)
4498                                 return ret;
4499                 }
4500         }
4501         return 0;
4502 }
4503
4504 /* deferred variables --- those that are only defined if needed. */
4505
4506 /*
4507  * Is there any reason to use a hash table for deferred variables?  At the
4508  * moment, there are only 1 to 3 such variables, so it may not be worth
4509  * the overhead.  If more modules start using this facility, it should
4510  * probably be converted into a hash table.
4511  */
4512
4513 static struct deferred_variable {
4514         NODE *(*load_func)(void);
4515         struct deferred_variable *next;
4516         char name[1];   /* variable-length array */
4517 } *deferred_variables;
4518
4519 /* register_deferred_variable --- add a var name and loading function to the list */
4520
4521 void
4522 register_deferred_variable(const char *name, NODE *(*load_func)(void))
4523 {
4524         struct deferred_variable *dv;
4525         size_t sl = strlen(name);
4526
4527         emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
4528                 "register_deferred_variable");
4529         dv->load_func = load_func;
4530         dv->next = deferred_variables;
4531         memcpy(dv->name, name, sl+1);
4532         deferred_variables = dv;
4533 }
4534
4535 /* variable --- make sure NAME is in the symbol table */
4536
4537 NODE *
4538 variable(char *name, NODETYPE type)
4539 {
4540         NODE *r;
4541
4542         if ((r = lookup(name)) != NULL) {
4543                 if (r->type == Node_func) {
4544                         error(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
4545                                 r->vname);
4546                         errcount++;
4547                         r->type = Node_var_new; /* continue parsing instead of exiting */
4548                 }
4549         } else {
4550                 /* not found */
4551                 struct deferred_variable *dv;
4552
4553                 for (dv = deferred_variables; TRUE; dv = dv->next) {
4554                         if (dv == NULL) {
4555                         /*
4556                          * This is the only case in which we may not free the string.
4557                          */
4558                                 if (type == Node_var)
4559                                         r = mk_symbol(type, Nnull_string);
4560                                 else
4561                                         r = mk_symbol(type, (NODE *) NULL);
4562                                 return install_symbol(name, r);
4563                         }
4564                         if (strcmp(name, dv->name) == 0) {
4565                                 r = (*dv->load_func)();
4566                                 break;
4567                         }
4568                 }
4569         }
4570         efree(name);
4571         return r;
4572 }
4573
4574 /* make_regnode --- make a regular expression node */
4575
4576 static NODE *
4577 make_regnode(int type, NODE *exp)
4578 {
4579         NODE *n;
4580
4581         getnode(n);
4582         memset(n, 0, sizeof(NODE));
4583         n->type = type;
4584         n->re_cnt = 1;
4585
4586         if (type == Node_regex) {
4587                 n->re_reg = make_regexp(exp->stptr, exp->stlen, FALSE, TRUE, FALSE);
4588                 if (n->re_reg == NULL) {
4589                         freenode(n);
4590                         return NULL;
4591                 }
4592                 n->re_exp = exp;
4593                 n->re_flags = CONSTANT;
4594         }
4595         return n;
4596 }
4597
4598
4599 /* mk_rexp --- make a regular expression constant */
4600
4601 static NODE *
4602 mk_rexp(INSTRUCTION *list)
4603 {
4604         INSTRUCTION *ip;
4605
4606         ip = list->nexti;
4607         if (ip == list->lasti && ip->opcode == Op_match_rec)
4608                 ip->opcode = Op_push_re;
4609         else {
4610                 ip = instruction(Op_push_re);
4611                 ip->memory = make_regnode(Node_dynregex, NULL);
4612                 ip->nexti = list->lasti->nexti;
4613                 list->lasti->nexti = ip;
4614                 list->lasti = ip;
4615         }
4616         return ip->memory;
4617 }
4618
4619 /* isnoeffect --- when used as a statement, has no side effects */
4620
4621 static int
4622 isnoeffect(OPCODE type)
4623 {
4624         switch (type) {
4625         case Op_times:
4626         case Op_times_i:
4627         case Op_quotient:
4628         case Op_quotient_i:
4629         case Op_mod:
4630         case Op_mod_i:
4631         case Op_plus:
4632         case Op_plus_i:
4633         case Op_minus:
4634         case Op_minus_i:
4635         case Op_subscript:
4636         case Op_concat:
4637         case Op_exp:
4638         case Op_exp_i:
4639         case Op_unary_minus:
4640         case Op_field_spec:
4641         case Op_and_final:
4642         case Op_or_final:
4643         case Op_equal:
4644         case Op_notequal:
4645         case Op_less:
4646         case Op_greater:
4647         case Op_leq:
4648         case Op_geq:
4649         case Op_match:
4650         case Op_nomatch:
4651         case Op_match_rec:
4652         case Op_not:
4653         case Op_in_array:
4654                 return TRUE;
4655         default:
4656                 break;  /* keeps gcc -Wall happy */
4657         }
4658
4659         return FALSE;
4660 }
4661
4662 /* make_assignable --- make this operand an assignable one if posiible */
4663
4664 static INSTRUCTION *
4665 make_assignable(INSTRUCTION *ip)
4666 {
4667         switch (ip->opcode) {
4668         case Op_push:
4669                 if (ip->memory->type == Node_param_list
4670                                 && (ip->memory->flags & FUNC) != 0)
4671                         return NULL;
4672                 ip->opcode = Op_push_lhs;
4673                 return ip;
4674         case Op_field_spec:
4675                 ip->opcode = Op_field_spec_lhs;
4676                 return ip;
4677         case Op_subscript:
4678                 ip->opcode = Op_subscript_lhs;
4679                 return ip;
4680         default:
4681                 break;  /* keeps gcc -Wall happy */
4682         }
4683         return NULL;
4684 }
4685
4686 /* stopme --- for debugging */
4687
4688 NODE *
4689 stopme(int nargs ATTRIBUTE_UNUSED)
4690 {
4691         return (NODE *) 0;
4692 }
4693
4694 /* dumpintlstr --- write out an initial .po file entry for the string */
4695
4696 static void
4697 dumpintlstr(const char *str, size_t len)
4698 {
4699         char *cp;
4700
4701         /* See the GNU gettext distribution for details on the file format */
4702
4703         if (source != NULL) {
4704                 /* ala the gettext sources, remove leading `./'s */
4705                 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4706                         continue;
4707                 printf("#: %s:%d\n", cp, sourceline);
4708         }
4709
4710         printf("msgid ");
4711         pp_string_fp(fprintf, stdout, str, len, '"', TRUE);
4712         putchar('\n');
4713         printf("msgstr \"\"\n\n");
4714         fflush(stdout);
4715 }
4716
4717 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
4718
4719 static void
4720 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
4721 {
4722         char *cp;
4723
4724         /* See the GNU gettext distribution for details on the file format */
4725
4726         if (source != NULL) {
4727                 /* ala the gettext sources, remove leading `./'s */
4728                 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
4729                         continue;
4730                 printf("#: %s:%d\n", cp, sourceline);
4731         }
4732
4733         printf("msgid ");
4734         pp_string_fp(fprintf, stdout, str1, len1, '"', TRUE);
4735         putchar('\n');
4736         printf("msgid_plural ");
4737         pp_string_fp(fprintf, stdout, str2, len2, '"', TRUE);
4738         putchar('\n');
4739         printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
4740         fflush(stdout);
4741 }
4742
4743 /* isarray --- can this type be subscripted? */
4744
4745 static int
4746 isarray(NODE *n)
4747 {
4748         switch (n->type) {
4749         case Node_var_new:
4750         case Node_var_array:
4751                 return TRUE;
4752         case Node_param_list:
4753                 return (n->flags & FUNC) == 0;
4754         case Node_array_ref:
4755                 cant_happen();
4756                 break;
4757         default:
4758                 break;  /* keeps gcc -Wall happy */
4759         }
4760
4761         return FALSE;
4762 }
4763
4764 /* mk_binary --- instructions for binary operators */
4765
4766 static INSTRUCTION *
4767 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
4768 {
4769         INSTRUCTION *ip1,*ip2;
4770         AWKNUM res;
4771
4772         ip2 = s2->nexti;
4773         if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
4774         /* do any numeric constant folding */
4775                 ip1 = s1->nexti;
4776                 if (do_optimize > 1
4777                                 && ip1 == s1->lasti && ip1->opcode == Op_push_i
4778                                 && (ip1->memory->flags & (STRCUR|STRING)) == 0
4779                                 && (ip2->memory->flags & (STRCUR|STRING)) == 0
4780                 ) {
4781                         NODE *n1 = ip1->memory, *n2 = ip2->memory;
4782                         res = force_number(n1);
4783                         (void) force_number(n2);
4784                         switch (op->opcode) {
4785                         case Op_times:
4786                                 res *= n2->numbr;
4787                                 break;
4788                         case Op_quotient:
4789                                 if (n2->numbr == 0.0) {
4790                                         /* don't fatalize, allow parsing rest of the input */
4791                                         error_ln(op->source_line, _("division by zero attempted"));
4792                                         goto regular;
4793                                 }
4794
4795                                 res /= n2->numbr;
4796                                 break;
4797                         case Op_mod:
4798                                 if (n2->numbr == 0.0) {
4799                                         /* don't fatalize, allow parsing rest of the input */
4800                                         error_ln(op->source_line, _("division by zero attempted in `%%'"));
4801                                         goto regular;
4802                                 }
4803 #ifdef HAVE_FMOD
4804                                 res = fmod(res, n2->numbr);
4805 #else   /* ! HAVE_FMOD */
4806                                 (void) modf(res / n2->numbr, &res);
4807                                 res = n1->numbr - res * n2->numbr;
4808 #endif  /* ! HAVE_FMOD */
4809                                 break;
4810                         case Op_plus:
4811                                 res += n2->numbr;
4812                                 break;
4813                         case Op_minus:
4814                                 res -= n2->numbr;
4815                                 break;
4816                         case Op_exp:
4817                                 res = calc_exp(res, n2->numbr);
4818                                 break;
4819                         default:
4820                                 goto regular;
4821                         }
4822
4823                         op->opcode = Op_push_i;
4824                         op->memory = mk_number(res, (PERM|NUMCUR|NUMBER));
4825                         n1->flags &= ~PERM;
4826                         n1->flags |= MALLOC;
4827                         n2->flags &= ~PERM;
4828                         n2->flags |= MALLOC;
4829                         unref(n1);
4830                         unref(n2);
4831                         bcfree(ip1);
4832                         bcfree(ip2);
4833                         bcfree(s1);
4834                         bcfree(s2);
4835                         return list_create(op);
4836                 } else {
4837                 /* do basic arithmetic optimisation */
4838                 /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
4839                         switch (op->opcode) {
4840                         case Op_times:
4841                                 op->opcode = Op_times_i;
4842                                 break;
4843                         case Op_quotient:
4844                                 op->opcode = Op_quotient_i;
4845                                 break;
4846                         case Op_mod:
4847                                 op->opcode = Op_mod_i;
4848                                 break;
4849                         case Op_plus:
4850                                 op->opcode = Op_plus_i;
4851                                 break;
4852                         case Op_minus:
4853                                 op->opcode = Op_minus_i;
4854                                 break;
4855                         case Op_exp:
4856                                 op->opcode = Op_exp_i;
4857                                 break;
4858                         default:
4859                                 goto regular;
4860                         }       
4861
4862                         op->memory = ip2->memory;
4863                         bcfree(ip2);
4864                         bcfree(s2);     /* Op_list */
4865                         return list_append(s1, op);
4866                 }
4867         }
4868
4869 regular:
4870         /* append lists s1, s2 and add `op' bytecode */
4871         (void) list_merge(s1, s2);
4872         return list_append(s1, op);
4873 }
4874
4875 /* mk_boolean --- instructions for boolean and, or */
4876  
4877 static INSTRUCTION *
4878 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
4879 {
4880         INSTRUCTION *tp;
4881         OPCODE opc, final_opc;
4882
4883         opc = op->opcode;               /* Op_and or Op_or */
4884         final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
4885
4886         add_lint(right, LINT_assign_in_cond);
4887
4888         tp = left->lasti;
4889
4890         if (tp->opcode != final_opc) {  /* x || y */
4891                 list_append(right, instruction(final_opc));
4892                 add_lint(left, LINT_assign_in_cond);
4893                 (void) list_append(left, op);
4894                 left->lasti->target_jmp = right->lasti;
4895
4896                 /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
4897                  *     target_stmt only used in the parser (see below).
4898                  */
4899
4900                 left->lasti->target_stmt = left->lasti;
4901                 right->lasti->target_stmt = left->lasti;
4902         } else {                /* optimization for x || y || z || ... */
4903                 INSTRUCTION *ip;
4904                 
4905                 op->opcode = final_opc;
4906                 (void) list_append(right, op);
4907                 op->target_stmt = tp;
4908                 tp->opcode = opc;
4909                 tp->target_jmp = op;
4910
4911                 /* update jump targets */
4912                 for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
4913                         assert(ip->opcode == opc);
4914                         assert(ip->target_jmp == tp);
4915                         /* if (ip->opcode == opc &&  ip->target_jmp == tp) */
4916                         ip->target_jmp = op;
4917                         if (ip->target_stmt == ip)
4918                                 break;
4919                 }
4920         }
4921
4922         return list_merge(left, right);
4923 }
4924
4925 /* mk_condition --- if-else and conditional */
4926
4927 static INSTRUCTION *
4928 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
4929                 INSTRUCTION *elsep, INSTRUCTION *false_branch)
4930 {
4931         /*
4932          *    ----------------
4933          *       cond
4934          *    ----------------
4935          * t: [Op_jmp_false f ]
4936          *    ----------------
4937          *       true_branch
4938          *
4939          *    ----------------
4940          *    [Op_jmp y]
4941          *    ---------------- 
4942          * f:
4943          *      false_branch
4944          *    ----------------
4945          * y: [Op_no_op]
4946          *    ----------------
4947          */
4948
4949         INSTRUCTION *ip;
4950
4951         if (false_branch == NULL) {
4952                 false_branch = list_create(instruction(Op_no_op));
4953                 if (elsep != NULL) {            /* else { } */
4954                         if (do_profiling)
4955                                 (void) list_prepend(false_branch, elsep);
4956                         else
4957                                 bcfree(elsep);
4958                 }
4959         } else {
4960                 /* assert(elsep != NULL); */
4961
4962                 /* avoid a series of no_op's: if .. else if .. else if .. */
4963                 if (false_branch->lasti->opcode != Op_no_op)
4964                         (void) list_append(false_branch, instruction(Op_no_op));
4965                 if (do_profiling) {
4966                         (void) list_prepend(false_branch, elsep);
4967                         false_branch->nexti->branch_end = false_branch->lasti;
4968                         (void) list_prepend(false_branch, instruction(Op_exec_count));
4969                 } else
4970                         bcfree(elsep);
4971         }
4972
4973         (void) list_prepend(false_branch, instruction(Op_jmp));
4974         false_branch->nexti->target_jmp = false_branch->lasti;
4975
4976         add_lint(cond, LINT_assign_in_cond);
4977         ip = list_append(cond, instruction(Op_jmp_false));
4978         ip->lasti->target_jmp = false_branch->nexti->nexti;
4979
4980         if (do_profiling) {
4981                 (void) list_prepend(ip, ifp);
4982                 (void) list_append(ip, instruction(Op_exec_count));
4983                 ip->nexti->branch_if = ip->lasti;
4984                 ip->nexti->branch_else = false_branch->nexti;
4985         } else
4986                 bcfree(ifp);
4987
4988         if (true_branch != NULL)
4989                 list_merge(ip, true_branch);
4990         return list_merge(ip, false_branch);
4991 }
4992
4993 enum defline { FIRST_LINE, LAST_LINE };
4994
4995 /* find_line -- find the first(last) line in a list of (pattern) instructions */
4996
4997 static int
4998 find_line(INSTRUCTION *pattern, enum defline what)
4999 {
5000         INSTRUCTION *ip;
5001         int lineno = 0;
5002
5003         for (ip = pattern->nexti; ip; ip = ip->nexti) {
5004                 if (what == LAST_LINE) {
5005                         if (ip->source_line > lineno)
5006                                 lineno = ip->source_line;
5007                 } else {        /* FIRST_LINE */
5008                         if (ip->source_line > 0
5009                                         && (lineno == 0 || ip->source_line < lineno))
5010                                 lineno = ip->source_line;
5011                 }
5012                 if (ip == pattern->lasti)
5013                         break;
5014         }
5015         assert(lineno > 0);
5016         return lineno;
5017 }
5018
5019 /* append_rule --- pattern-action instructions */
5020
5021 static INSTRUCTION *
5022 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
5023 {
5024         /*
5025          *    ----------------
5026          *       pattern
5027          *    ----------------
5028          *    [Op_jmp_false f ]
5029          *    ----------------
5030          *       action
5031          *    ----------------
5032          * f: [Op_no_op       ]
5033          *    ----------------
5034          */
5035
5036         INSTRUCTION *rp;
5037         INSTRUCTION *tp;
5038         INSTRUCTION *ip;
5039
5040         if (rule != Rule) {
5041                 rp = pattern;
5042                 if (do_profiling)
5043                         (void) list_append(action, instruction(Op_no_op));
5044                 (rp + 1)->firsti = action->nexti;
5045                 (rp + 1)->lasti = action->lasti;
5046                 (rp + 2)->first_line = pattern->source_line;
5047                 (rp + 2)->last_line = lastline;
5048                 ip = list_prepend(action, rp);
5049
5050         } else {
5051                 rp = bcalloc(Op_rule, 3, 0);
5052                 rp->in_rule = Rule;
5053                 rp->source_file = source;
5054                 tp = instruction(Op_no_op);
5055
5056                 if (pattern == NULL) {
5057                         /* assert(action != NULL); */
5058                         if (do_profiling)
5059                                 (void) list_prepend(action, instruction(Op_exec_count));
5060                         (rp + 1)->firsti = action->nexti;
5061                         (rp + 1)->lasti = tp;
5062                         (rp + 2)->first_line = firstline;
5063                         (rp + 2)->last_line = lastline;
5064                         rp->source_line = firstline;
5065                         ip = list_prepend(list_append(action, tp), rp);
5066                 } else {
5067                         (void) list_append(pattern, instruction(Op_jmp_false));
5068                         pattern->lasti->target_jmp = tp;
5069                         (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
5070                         rp->source_line = (rp + 2)->first_line;
5071                         if (action == NULL) {
5072                                 (rp + 2)->last_line = find_line(pattern, LAST_LINE);
5073                                 action = list_create(instruction(Op_K_print_rec));
5074                                 if (do_profiling)
5075                                         (void) list_prepend(action, instruction(Op_exec_count));
5076                         } else
5077                                 (rp + 2)->last_line = lastline;
5078
5079                         if (do_profiling) {
5080                                 (void) list_prepend(pattern, instruction(Op_exec_count));
5081                                 (void) list_prepend(action, instruction(Op_exec_count));
5082                         }
5083                         (rp + 1)->firsti = action->nexti;
5084                         (rp + 1)->lasti = tp;
5085                         ip = list_append(
5086                                         list_merge(list_prepend(pattern, rp),
5087                                                 action),
5088                                         tp);
5089                 }
5090
5091         }
5092
5093         list_append(rule_list, rp + 1);
5094
5095         if (rule_block[rule] == NULL)
5096                 rule_block[rule] = ip;
5097         else
5098                 (void) list_merge(rule_block[rule], ip);
5099         
5100         return rule_block[rule];
5101 }
5102
5103 /* mk_assignment --- assignment bytecodes */
5104
5105 static INSTRUCTION *
5106 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
5107 {
5108         INSTRUCTION *tp;
5109         INSTRUCTION *ip;
5110
5111         tp = lhs->lasti;
5112         switch (tp->opcode) {
5113         case Op_field_spec:
5114                 tp->opcode = Op_field_spec_lhs;
5115                 break;
5116         case Op_subscript:
5117                 tp->opcode = Op_subscript_lhs;
5118                 break;
5119         case Op_push:
5120         case Op_push_array:
5121                 tp->opcode = Op_push_lhs; 
5122                 break;
5123         default:
5124                 cant_happen();
5125         }
5126
5127         tp->do_reference = (op->opcode != Op_assign);   /* check for uninitialized reference */
5128
5129         if (rhs != NULL)
5130                 ip = list_merge(rhs, lhs);
5131         else
5132                 ip = lhs;
5133
5134         (void) list_append(ip, op);
5135
5136         if (tp->opcode == Op_push_lhs
5137                         && tp->memory->type == Node_var
5138                         && tp->memory->var_assign
5139         ) {
5140                 tp->do_reference = FALSE; /* no uninitialized reference checking
5141                                            * for a special variable.
5142                                            */
5143                 (void) list_append(ip, instruction(Op_var_assign));
5144                 ip->lasti->assign_var = tp->memory->var_assign;
5145         } else if (tp->opcode == Op_field_spec_lhs) {
5146                 (void) list_append(ip, instruction(Op_field_assign));
5147                 ip->lasti->field_assign = (Func_ptr) 0;
5148                 tp->target_assign = ip->lasti;
5149         }
5150
5151         return ip;
5152 }
5153
5154 /* optimize_assignment --- peephole optimization for assignment */
5155
5156 static INSTRUCTION *
5157 optimize_assignment(INSTRUCTION *exp)
5158 {
5159         INSTRUCTION *i1;
5160         INSTRUCTION *i2;
5161         INSTRUCTION *i3;
5162
5163         /*
5164          * Optimize assignment statements array[subs] = x; var = x; $n = x;
5165          * string concatenation of the form s = s t.
5166          *
5167          * 1) Array element assignment array[subs] = x:
5168          *   Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
5169          *   with single instruction Op_store_sub.
5170          *       Limitation: 1 dimension and sub is simple var/value.
5171          * 
5172          * 2) Simple variable assignment var = x:
5173          *   Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
5174          *
5175          * 3) Field assignment $n = x:
5176          *   Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
5177          *   with Op_store_field.
5178          *
5179          * 4) Optimization for string concatenation:
5180          *   For cases like x = x y, uses realloc to include y in x;
5181          *   also eliminates instructions Op_push_lhs and Op_pop.
5182          */
5183
5184         /*
5185          * N.B.: do not append Op_pop instruction to the returned
5186          * instruction list if optimized. None of these
5187          * optimized instructions pushes the r-value of assignment
5188          * onto the runtime stack.
5189          */
5190
5191         i2 = NULL;
5192         i1 = exp->lasti;
5193
5194         if (   ! do_optimize
5195             || (   i1->opcode != Op_assign
5196                 && i1->opcode != Op_field_assign)
5197         )
5198                 return list_append(exp, instruction(Op_pop));
5199
5200         for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
5201                 switch (i2->opcode) {
5202                 case Op_concat:
5203                         if (i2->nexti->opcode == Op_push_lhs    /* l.h.s is a simple variable */
5204                                 && (i2->concat_flag & CSVAR)        /* 1st exp in r.h.s is a simple variable;
5205                                                                      * see Op_concat in the grammer above.
5206                                                                      */
5207                                 && i2->nexti->memory == exp->nexti->memory       /* and the same as in l.h.s */
5208                                 && i2->nexti->nexti == i1
5209                                 && i1->opcode == Op_assign
5210                         ) {
5211                                 /* s = s ... optimization */
5212
5213                                 /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
5214                                  * check for l-value reference to this variable in the r.h.s.
5215                                  * Also, avoid function calls in general to guard against
5216                                  * global variable assignment.
5217                                  */
5218
5219                                 for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
5220                                         if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
5221                                                         || i3->opcode == Op_func_call)
5222                                                 return list_append(exp, instruction(Op_pop)); /* no optimization */
5223                                 }
5224
5225                                 /* remove the variable from r.h.s */
5226                                 i3 = exp->nexti;
5227                                 exp->nexti = i3->nexti;
5228                                 bcfree(i3);
5229
5230                                 if (--i2->expr_count == 1)      /* one less expression in Op_concat */
5231                                         i2->opcode = Op_no_op;
5232
5233                                 i3 = i2->nexti;
5234                                 assert(i3->opcode == Op_push_lhs);
5235                                 i3->opcode = Op_assign_concat;  /* change Op_push_lhs to Op_assign_concat */
5236                                 i3->nexti = NULL;
5237                                 bcfree(i1);          /* Op_assign */
5238                                 exp->lasti = i3;     /* update Op_list */
5239                                 return exp;
5240                         }
5241                         break;
5242
5243                 case Op_field_spec_lhs:
5244                         if (i2->nexti->opcode == Op_assign
5245                                         && i2->nexti->nexti == i1
5246                                         && i1->opcode == Op_field_assign
5247                         ) {
5248                                 /* $n = .. */
5249                                 i2->opcode = Op_store_field;
5250                                 bcfree(i2->nexti);  /* Op_assign */
5251                                 i2->nexti = NULL;
5252                                 bcfree(i1);          /* Op_field_assign */
5253                                 exp->lasti = i2;    /* update Op_list */
5254                                 return exp;
5255                         }
5256                         break;
5257
5258                 case Op_push_array:
5259                         if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
5260                                 i3 = i2->nexti->nexti;
5261                                 if (i3->sub_count == 1
5262                                                 && i3->nexti == i1
5263                                                 && i1->opcode == Op_assign
5264                                 ) {
5265                                         /* array[sub] = .. */
5266                                         i3->opcode = Op_store_sub;
5267                                         i3->memory = i2->memory;
5268                                         i3->expr_count = 1;  /* sub_count shadows memory,
5269                                           * so use expr_count instead.
5270                                                           */
5271                                         i3->nexti = NULL;
5272                                         i2->opcode = Op_no_op;                                  
5273                                         bcfree(i1);          /* Op_assign */
5274                                         exp->lasti = i3;     /* update Op_list */
5275                                         return exp;
5276                                 }
5277                         }
5278                         break;
5279
5280                 case Op_push_lhs:
5281                         if (i2->nexti == i1
5282                                                 && i1->opcode == Op_assign
5283                         ) {
5284                                 /* var = .. */
5285                                 i2->opcode = Op_store_var;
5286                                 i2->nexti = NULL;
5287                                 bcfree(i1);          /* Op_assign */
5288                                 exp->lasti = i2;     /* update Op_list */
5289                                 return exp;
5290                         }
5291                         break;
5292
5293                 default:
5294                         break;
5295                 }
5296         }
5297
5298         /* no optimization  */
5299         return list_append(exp, instruction(Op_pop));
5300 }
5301
5302
5303 /* mk_getline --- make instructions for getline */
5304
5305 static INSTRUCTION *
5306 mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype)
5307 {
5308         INSTRUCTION *ip;
5309         INSTRUCTION *tp;
5310         INSTRUCTION *asgn = NULL;
5311
5312         /*
5313          *  getline [var] < [file]
5314          *
5315          *  [ file (simp_exp)]
5316          *  [ [ var ] ]
5317          *  [ Op_K_getline_redir|NULL|redir_type|into_var]
5318          *  [ [var_assign] ] 
5319          *
5320          */
5321
5322         if (redir == NULL) {
5323                 int sline = op->source_line;
5324                 bcfree(op);
5325                 op = bcalloc(Op_K_getline, 2, sline);
5326                 (op + 1)->target_endfile = ip_endfile;
5327                 (op + 1)->target_beginfile = ip_beginfile;      
5328         }
5329
5330         if (var != NULL) {
5331                 tp = make_assignable(var->lasti);
5332                 assert(tp != NULL);
5333
5334                 /* check if we need after_assign bytecode */
5335                 if (tp->opcode == Op_push_lhs
5336                                 && tp->memory->type == Node_var
5337                                 && tp->memory->var_assign
5338                 ) {
5339                         asgn = instruction(Op_var_assign);
5340                         asgn->assign_ctxt = op->opcode;
5341                         asgn->assign_var = tp->memory->var_assign;
5342                 } else if (tp->opcode == Op_field_spec_lhs) {
5343                         asgn = instruction(Op_field_assign);
5344                         asgn->assign_ctxt = op->opcode;
5345                         asgn->field_assign = (Func_ptr) 0;   /* determined at run time */
5346                         tp->target_assign = asgn;
5347                 }
5348                 if (redir != NULL) {
5349                         ip = list_merge(redir, var);
5350                         (void) list_append(ip, op);
5351                 } else
5352                         ip = list_append(var, op);
5353         } else if (redir != NULL)
5354                 ip = list_append(redir, op);
5355         else
5356                 ip = list_create(op);
5357         op->into_var = (var != NULL);
5358         op->redir_type = (redir != NULL) ? redirtype : 0;
5359
5360         return (asgn == NULL ? ip : list_append(ip, asgn));
5361 }
5362
5363
5364 /* mk_for_loop --- for loop bytecodes */
5365
5366 static INSTRUCTION *
5367 mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
5368                                 INSTRUCTION *incr, INSTRUCTION *body)
5369 {
5370         /*
5371          *   ------------------------
5372          *        init                 (may be NULL)
5373          *   ------------------------
5374          * x:
5375          *        cond                 (Op_no_op if NULL)
5376          *   ------------------------
5377          *    [ Op_jmp_false tb      ]
5378          *   ------------------------
5379          *        body                 (may be NULL)
5380          *   ------------------------
5381          * tc: 
5382          *    incr                      (may be NULL)
5383          *    [ Op_jmp x             ] 
5384          *   ------------------------
5385          * tb:[ Op_no_op             ] 
5386          */
5387
5388         INSTRUCTION *ip, *tbreak, *tcont;
5389         INSTRUCTION *jmp;
5390         INSTRUCTION *pp_cond;
5391         INSTRUCTION *ret;
5392
5393         tbreak = instruction(Op_no_op);
5394
5395         if (cond != NULL) {
5396                 add_lint(cond, LINT_assign_in_cond);
5397                 pp_cond = cond->nexti;
5398                 ip = cond;
5399                 (void) list_append(ip, instruction(Op_jmp_false));
5400                 ip->lasti->target_jmp = tbreak;
5401         } else {
5402                 pp_cond = instruction(Op_no_op);
5403                 ip = list_create(pp_cond);
5404         }
5405
5406         if (init != NULL)
5407                 ip = list_merge(init, ip);
5408
5409         if (do_profiling) {
5410                 (void) list_append(ip, instruction(Op_exec_count));
5411                 (forp + 1)->forloop_cond = pp_cond;
5412                 (forp + 1)->forloop_body = ip->lasti;
5413         }
5414
5415         if (body != NULL)
5416                 (void) list_merge(ip, body);
5417
5418         jmp = instruction(Op_jmp);
5419         jmp->target_jmp = pp_cond;
5420         if (incr == NULL)
5421                 tcont = jmp;
5422         else {
5423                 tcont = incr->nexti;
5424                 (void) list_merge(ip, incr);
5425         }
5426
5427         (void) list_append(ip, jmp);
5428         ret = list_append(ip, tbreak);
5429         fix_break_continue(ret, tbreak, tcont);
5430
5431         if (do_profiling) {
5432                 forp->target_break = tbreak;
5433                 forp->target_continue = tcont;
5434                 ret = list_prepend(ret, forp);
5435         } /* else
5436                         forp is NULL */
5437
5438         return ret;
5439 }
5440
5441 /* add_lint --- add lint warning bytecode if needed */
5442
5443 static void
5444 add_lint(INSTRUCTION *list, LINTTYPE linttype)
5445 {
5446 #ifndef NO_LINT
5447         INSTRUCTION *ip;
5448
5449         switch (linttype) {
5450         case LINT_assign_in_cond:
5451                 ip = list->lasti;
5452                 if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
5453                         assert(ip != list->nexti);
5454                         for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5455                                 ;
5456                 }
5457
5458                 if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
5459                         list_append(list, instruction(Op_lint));
5460                         list->lasti->lint_type = linttype;
5461                 }
5462                 break;
5463
5464         case LINT_no_effect:
5465                 if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
5466                         for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
5467                                 ;
5468
5469                         if (do_lint) {          /* compile-time warning */
5470                                 if (isnoeffect(ip->opcode))
5471                                         lintwarn_ln(ip->source_line, ("statement may have no effect"));
5472                         }
5473
5474                         if (ip->opcode == Op_push) {            /* run-time warning */
5475                                 list_append(list, instruction(Op_lint));
5476                                 list->lasti->lint_type = linttype;
5477                         }
5478                 }
5479                 break;
5480
5481         default:
5482                 break;
5483         }
5484 #endif
5485 }
5486
5487 /* mk_expression_list --- list of bytecode lists */
5488
5489 static INSTRUCTION *
5490 mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
5491 {
5492         INSTRUCTION *r;
5493
5494         /* we can't just combine all bytecodes, since we need to
5495          * process individual expressions for a few builtins in snode() (-:
5496          */
5497         
5498         /* -- list of lists     */
5499         /* [Op_list| ... ]------
5500          *                       |
5501          * [Op_list| ... ]   --  |
5502          *  ...               |  |
5503          *  ...       <-------   |
5504          * [Op_list| ... ]   --  |
5505          *  ...               |  |
5506          *  ...               |  |
5507          *  ...       <------- --
5508          */
5509
5510         assert(s1 != NULL && s1->opcode == Op_list);
5511         if (list == NULL) {
5512                 list = instruction(Op_list);
5513                 list->nexti = s1;
5514                 list->lasti = s1->lasti;
5515                 return list;
5516         }
5517
5518         /* append expression to the end of the list */
5519
5520         r = list->lasti;
5521         r->nexti = s1;
5522         list->lasti = s1->lasti;
5523         return list;
5524 }
5525
5526 /* count_expressions --- fixup expression_list from mk_expression_list.
5527  *                       returns no of expressions in list. isarg is true
5528  *                       for function arguments.
5529  */
5530
5531 static int
5532 count_expressions(INSTRUCTION **list, int isarg)
5533 {
5534         INSTRUCTION *expr;
5535         INSTRUCTION *r = NULL;
5536         int count = 0;
5537
5538         if (*list == NULL)      /* error earlier */
5539                 return 0;
5540
5541         for (expr = (*list)->nexti; expr; ) {
5542                 INSTRUCTION *t1, *t2;
5543                 t1 = expr->nexti;
5544                 t2 = expr->lasti;
5545                 if (isarg && t1 == t2 && t1->opcode == Op_push)
5546                         t1->opcode = Op_push_param;
5547                 if (++count == 1)
5548                         r = expr;
5549                 else
5550                         (void) list_merge(r, expr);
5551                 expr = t2->nexti;
5552         }
5553  
5554         assert(count > 0);
5555         if (! isarg && count > max_args)
5556                 max_args = count;
5557         bcfree(*list);
5558         *list = r;
5559         return count;
5560 }
5561
5562 /* fix_break_continue --- fix up break & continue codes in loop bodies */
5563
5564 static void
5565 fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target)
5566 {
5567         INSTRUCTION *ip;
5568
5569         list->lasti->nexti = NULL;      /* just to make sure */
5570
5571         for (ip = list->nexti; ip != NULL; ip = ip->nexti) {
5572                 switch (ip->opcode) {
5573                 case Op_K_break:
5574                         if (ip->target_jmp == NULL)
5575                                 ip->target_jmp = b_target;
5576                         break;
5577
5578                 case Op_K_continue:
5579                         if (ip->target_jmp == NULL)
5580                                 ip->target_jmp = c_target;
5581                         break;
5582
5583                 default:
5584                         /* this is to keep the compiler happy. sheesh. */
5585                         break;
5586                 }
5587         }
5588 }
5589
5590
5591 /* append_symbol --- append symbol to the list of symbols
5592  *                  installed in the symbol table.
5593  */
5594
5595 void
5596 append_symbol(char *name)
5597 {
5598         NODE *hp;
5599
5600         /* N.B.: func_install removes func name and reinstalls it;
5601          * and we get two entries for it here!. destroy_symbol()
5602          * will find and destroy the Node_func which is what we want.
5603          */
5604
5605         getnode(hp);
5606         hp->hname = name;       /* shallow copy */
5607         hp->hnext = symbol_list->hnext;
5608         symbol_list->hnext = hp;
5609 }
5610
5611 /* release_symbol --- free symbol list and optionally remove symbol from symbol table */
5612
5613 void
5614 release_symbols(NODE *symlist, int keep_globals)
5615 {
5616         NODE *hp, *n;
5617
5618         for (hp = symlist->hnext; hp != NULL; hp = n) {
5619                 if (! keep_globals) {
5620                         /* destroys globals, function, and params
5621                          * if still in symbol table and not removed by func_install
5622                          * due to parse error.
5623                          */
5624                         destroy_symbol(hp->hname);
5625                 }
5626                 n = hp->hnext;
5627                 freenode(hp);
5628         }
5629         symlist->hnext = NULL;
5630 }
5631
5632 /* destroy_symbol --- remove a symbol from symbol table
5633 *                     and free all associated memory.
5634 */
5635
5636 void
5637 destroy_symbol(char *name)
5638 {
5639         NODE *symbol, *hp;
5640
5641         symbol = lookup(name);
5642         if (symbol == NULL)
5643                 return;
5644
5645         if (symbol->type == Node_func) {
5646                 char **varnames;
5647                 NODE *func, *n;
5648                                 
5649                 func = symbol;
5650                 varnames = func->parmlist;
5651                 if (varnames != NULL)
5652                         efree(varnames);
5653
5654                 /* function parameters of type Node_param_list */                               
5655                 for (n = func->lnode->rnode; n != NULL; ) {
5656                         NODE *np;
5657                         np = n->rnode;
5658                         efree(n->param);
5659                         freenode(n);
5660                         n = np;
5661                 }               
5662                 freenode(func->lnode);
5663                 func_count--;
5664
5665         } else if (symbol->type == Node_var_array)
5666                 assoc_clear(symbol);
5667         else if (symbol->type == Node_var) 
5668                 unref(symbol->var_value);
5669
5670         /* remove from symbol table */
5671         hp = remove_symbol(name);
5672         efree(hp->hname);
5673         freenode(hp->hvalue);
5674         freenode(hp);
5675 }
5676
5677 #define pool_size       d.dl
5678 #define freei           x.xi
5679 static INSTRUCTION *pool_list;
5680 static AWK_CONTEXT *curr_ctxt = NULL;
5681
5682 /* new_context --- create a new execution context. */
5683
5684 AWK_CONTEXT *
5685 new_context()
5686 {
5687         AWK_CONTEXT *ctxt;
5688
5689         emalloc(ctxt, AWK_CONTEXT *, sizeof(AWK_CONTEXT), "new_context");
5690         memset(ctxt, 0, sizeof(AWK_CONTEXT));
5691         ctxt->srcfiles.next = ctxt->srcfiles.prev = &ctxt->srcfiles;
5692         ctxt->rule_list.opcode = Op_list;
5693         ctxt->rule_list.lasti = &ctxt->rule_list;
5694         return ctxt;
5695 }
5696
5697 /* set_context --- change current execution context. */
5698
5699 static void
5700 set_context(AWK_CONTEXT *ctxt)
5701 {
5702         pool_list = &ctxt->pools;
5703         symbol_list = &ctxt->symbols;
5704         srcfiles = &ctxt->srcfiles;
5705         rule_list = &ctxt->rule_list;
5706         install_func = ctxt->install_func;
5707         curr_ctxt = ctxt;
5708 }
5709
5710 /*
5711  * push_context:
5712  *
5713  * Switch to the given context after saving the current one. The set
5714  * of active execution contexts forms a stack; the global or main context
5715  * is at the bottom of the stack.
5716  */
5717
5718 void
5719 push_context(AWK_CONTEXT *ctxt)
5720 {
5721         ctxt->prev = curr_ctxt;
5722         /* save current source and sourceline */
5723         if (curr_ctxt != NULL) {
5724                 curr_ctxt->sourceline = sourceline;
5725                 curr_ctxt->source = source;
5726         }
5727         sourceline = 0;
5728         source = NULL;
5729         set_context(ctxt);
5730 }
5731
5732 /* pop_context --- switch to previous execution context. */ 
5733
5734 void
5735 pop_context()
5736 {
5737         AWK_CONTEXT *ctxt;
5738
5739         assert(curr_ctxt != NULL);
5740         ctxt = curr_ctxt->prev;
5741         /* restore source and sourceline */
5742         sourceline = ctxt->sourceline;
5743         source = ctxt->source;
5744         set_context(ctxt);
5745 }
5746
5747 /* in_main_context --- are we in the main context ? */
5748
5749 int
5750 in_main_context()
5751 {
5752         assert(curr_ctxt != NULL);
5753         return (curr_ctxt->prev == NULL);
5754 }
5755
5756 /* free_context --- free context structure and related data. */ 
5757
5758 void
5759 free_context(AWK_CONTEXT *ctxt, int keep_globals)
5760 {
5761         SRCFILE *s, *sn;
5762
5763         if (ctxt == NULL)
5764                 return;
5765
5766         assert(curr_ctxt != ctxt);
5767
5768         /* free all code including function codes */
5769         free_bcpool(&ctxt->pools);
5770         /* free symbols */
5771         release_symbols(&ctxt->symbols, keep_globals);
5772         /* free srcfiles */
5773         for (s = &ctxt->srcfiles; s != &ctxt->srcfiles; s = sn) {
5774                 sn = s->next;
5775                 if (s->stype != SRC_CMDLINE && s->stype != SRC_STDIN)
5776                         efree(s->fullpath);
5777                 efree(s->src);
5778                 efree(s);
5779         }
5780         efree(ctxt);
5781 }
5782
5783 /* free_bc_internal --- free internal memory of an instruction. */ 
5784
5785 static void
5786 free_bc_internal(INSTRUCTION *cp)
5787 {
5788         NODE *m;
5789
5790         switch(cp->opcode) {
5791         case Op_func_call:
5792                 if (cp->func_name != NULL
5793                                 && cp->func_name != builtin_func
5794                 )
5795                         efree(cp->func_name);
5796                 break;
5797         case Op_push_re:
5798         case Op_match_rec:
5799         case Op_match:
5800         case Op_nomatch:
5801                 m = cp->memory;
5802                 if (m->re_reg != NULL)
5803                         refree(m->re_reg);
5804                 if (m->re_exp != NULL)
5805                         unref(m->re_exp);
5806                 if (m->re_text != NULL)
5807                         unref(m->re_text);
5808                 freenode(m);
5809                 break;                  
5810         case Op_token:  /* token lost during error recovery in yyparse */
5811                 if (cp->lextok != NULL)
5812                         efree(cp->lextok);
5813                 break;
5814         case Op_illegal:
5815                 cant_happen();
5816         default:
5817                 break;  
5818         }
5819 }
5820
5821
5822 /* INSTR_CHUNK must be > largest code size (3) */
5823 #define INSTR_CHUNK 127
5824
5825 /* bcfree --- deallocate instruction */
5826
5827 void
5828 bcfree(INSTRUCTION *cp)
5829 {
5830         cp->opcode = 0;
5831         cp->nexti = pool_list->freei;
5832         pool_list->freei = cp;
5833 }       
5834
5835 /* bcalloc --- allocate a new instruction */
5836
5837 INSTRUCTION *
5838 bcalloc(OPCODE op, int size, int srcline)
5839 {
5840         INSTRUCTION *cp;
5841
5842         if (size > 1) {
5843                 /* wide instructions Op_rule, Op_func_call .. */
5844                 emalloc(cp, INSTRUCTION *, (size + 1) * sizeof(INSTRUCTION), "bcalloc");
5845                 cp->pool_size = size;
5846                 cp->nexti = pool_list->nexti;
5847                 pool_list->nexti = cp++;
5848         } else {
5849                 INSTRUCTION *pool;
5850
5851                 pool = pool_list->freei;
5852                 if (pool == NULL) {
5853                         INSTRUCTION *last;
5854                         emalloc(cp, INSTRUCTION *, (INSTR_CHUNK + 1) * sizeof(INSTRUCTION), "bcalloc");
5855
5856                         cp->pool_size = INSTR_CHUNK;
5857                         cp->nexti = pool_list->nexti;
5858                         pool_list->nexti = cp;
5859                         pool = ++cp;
5860                         last = &pool[INSTR_CHUNK - 1];
5861                         for (; cp <= last; cp++) {
5862                                 cp->opcode = 0;
5863                                 cp->nexti = cp + 1;
5864                         }
5865                         --cp;
5866                         cp->nexti = NULL;
5867                 }
5868                 cp = pool;
5869                 pool_list->freei = cp->nexti;
5870         }
5871
5872         memset(cp, 0, size * sizeof(INSTRUCTION));
5873         cp->opcode = op;
5874         cp->source_line = srcline;
5875         return cp;
5876 }
5877
5878 /* free_bcpool --- free list of instruction memory pools */
5879
5880 static void
5881 free_bcpool(INSTRUCTION *pl)
5882 {
5883         INSTRUCTION *pool, *tmp;
5884
5885         for (pool = pl->nexti; pool != NULL; pool = tmp) {
5886                 INSTRUCTION *cp, *last;
5887                 long psiz;
5888                 psiz = pool->pool_size;
5889                 if (psiz == INSTR_CHUNK)
5890                         last = pool + psiz;
5891                 else
5892                         last = pool + 1;
5893                 for (cp = pool + 1; cp <= last ; cp++) {
5894                         if (cp->opcode != 0)
5895                                 free_bc_internal(cp);
5896                 }
5897                 tmp = pool->nexti;
5898                 efree(pool);
5899         }
5900         memset(pl, 0, sizeof(INSTRUCTION));
5901 }
5902
5903
5904 static inline INSTRUCTION *
5905 list_create(INSTRUCTION *x)
5906 {
5907         INSTRUCTION *l;
5908
5909         l = instruction(Op_list);
5910         l->nexti = x;
5911         l->lasti = x;
5912         return l;
5913 }
5914
5915 static inline INSTRUCTION *
5916 list_append(INSTRUCTION *l, INSTRUCTION *x)
5917 {
5918 #ifdef GAWKDEBUG
5919         if (l->opcode != Op_list)
5920                 cant_happen();
5921 #endif
5922         l->lasti->nexti = x;
5923         l->lasti = x;
5924         return l;
5925 }
5926
5927 static inline INSTRUCTION *
5928 list_prepend(INSTRUCTION *l, INSTRUCTION *x)
5929 {
5930 #ifdef GAWKDEBUG
5931         if (l->opcode != Op_list)
5932                 cant_happen();
5933 #endif
5934         x->nexti = l->nexti;
5935         l->nexti = x;
5936         return l;
5937 }
5938
5939 static inline INSTRUCTION *
5940 list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
5941 {
5942 #ifdef GAWKDEBUG
5943         if (l1->opcode != Op_list)
5944                 cant_happen();
5945         if (l2->opcode != Op_list)
5946                 cant_happen();
5947 #endif
5948         l1->lasti->nexti = l2->nexti;
5949         l1->lasti = l2->lasti;
5950         bcfree(l2);
5951         return l1;
5952 }
5953
5954 /* See if name is a special token. */
5955
5956 int
5957 check_special(const char *name)
5958 {
5959         int low, high, mid;
5960         int i;
5961 #if 'a' == 0x81 /* it's EBCDIC */
5962         static int did_sort = FALSE;
5963
5964         if (! did_sort) {
5965                 qsort((void *) tokentab,
5966                                 sizeof(tokentab) / sizeof(tokentab[0]),
5967                                 sizeof(tokentab[0]), tokcompare);
5968                 did_sort = TRUE;
5969         }
5970 #endif
5971
5972         low = 0;
5973         high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
5974         while (low <= high) {
5975                 mid = (low + high) / 2;
5976                 i = *name - tokentab[mid].operator[0];
5977                 if (i == 0)
5978                         i = strcmp(name, tokentab[mid].operator);
5979
5980                 if (i < 0)              /* token < mid */
5981                         high = mid - 1;
5982                 else if (i > 0)         /* token > mid */
5983                         low = mid + 1;
5984                 else {
5985                         if ((do_traditional && (tokentab[mid].flags & GAWKX))
5986                                         || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
5987                                 return -1;
5988                         return mid;
5989                 }
5990         }
5991         return -1;
5992 }
5993
5994 /*
5995  * This provides a private version of functions that act like VMS's
5996  * variable-length record filesystem, where there was a bug on
5997  * certain source files.
5998  */
5999
6000 static FILE *fp = NULL;
6001
6002 /* read_one_line --- return one input line at a time. mainly for debugging. */
6003
6004 static ssize_t
6005 read_one_line(int fd, void *buffer, size_t count)
6006 {
6007         char buf[BUFSIZ];
6008
6009         /* Minor potential memory leak here. Too bad. */
6010         if (fp == NULL) {
6011                 fp = fdopen(fd, "r");
6012                 if (fp == NULL) {
6013                         fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
6014                         gawk_exit(EXIT_FAILURE);
6015                 }
6016         }
6017
6018         if (fgets(buf, sizeof buf, fp) == NULL)
6019                 return 0;
6020
6021         memcpy(buffer, buf, strlen(buf));
6022         return strlen(buf);
6023 }
6024
6025 /* one_line_close --- close the open file being read with read_one_line() */
6026
6027 static int
6028 one_line_close(int fd)
6029 {
6030         int ret;
6031
6032         if (fp == NULL || fd != fileno(fp))
6033                 fatal("debugging read/close screwed up!");
6034
6035         ret = fclose(fp);
6036         fp = NULL;
6037         return ret;
6038 }
6039