2 * eval.c - gawk bytecode interpreter
6 * Copyright (C) 1986, 1988, 1989, 1991-2013 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
28 extern void after_beginfile(IOBUF **curfile);
29 extern double pow(double x, double y);
30 extern double modf(double x, double *yp);
31 extern double fmod(double x, double y);
32 NODE **fcall_list = NULL;
35 IOBUF *curfile = NULL; /* current data file */
38 int (*interpret)(INSTRUCTION *);
39 #define MAX_EXEC_HOOKS 10
40 static int num_exec_hook = 0;
41 static Func_pre_exec pre_execute[MAX_EXEC_HOOKS];
42 static Func_post_exec post_execute = NULL;
44 extern void frame_popped();
51 static NODE *node_Boolean[2];
53 /* This rather ugly macro is for VMS C */
57 #define C(c) ((char)c)
59 * This table is used by the regexp routines to do case independent
60 * matching. Basically, every ascii character maps to itself, except
61 * uppercase letters map to lower case ones. This table has 256
62 * entries, for ISO 8859-1. Note also that if the system this
63 * is compiled on doesn't use 7-bit ascii, casetable[] should not be
64 * defined to the linker, so gawk should not load.
66 * Do NOT make this array static, it is used in several spots, not
70 * This table is also used for IGNORECASE for == and !=, and index().
71 * Although with GLIBC, we could use tolower() everywhere and RE_ICASE
72 * for the regex matcher, precomputing this table once gives us a
73 * performance improvement. I also think it's better for portability
74 * to non-GLIBC systems. All the world is not (yet :-) GNU/Linux.
76 #if 'a' == 97 /* it's ascii */
78 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
79 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
80 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
81 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
82 /* ' ' '!' '"' '#' '$' '%' '&' ''' */
83 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
84 /* '(' ')' '*' '+' ',' '-' '.' '/' */
85 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
86 /* '0' '1' '2' '3' '4' '5' '6' '7' */
87 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
88 /* '8' '9' ':' ';' '<' '=' '>' '?' */
89 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
90 /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
91 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
92 /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
93 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
94 /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
95 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
96 /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
97 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
98 /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
99 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
100 /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
101 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
102 /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
103 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
104 /* 'x' 'y' 'z' '{' '|' '}' '~' */
105 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
108 C('\200'), C('\201'), C('\202'), C('\203'), C('\204'), C('\205'), C('\206'), C('\207'),
109 C('\210'), C('\211'), C('\212'), C('\213'), C('\214'), C('\215'), C('\216'), C('\217'),
110 C('\220'), C('\221'), C('\222'), C('\223'), C('\224'), C('\225'), C('\226'), C('\227'),
111 C('\230'), C('\231'), C('\232'), C('\233'), C('\234'), C('\235'), C('\236'), C('\237'),
112 C('\240'), C('\241'), C('\242'), C('\243'), C('\244'), C('\245'), C('\246'), C('\247'),
113 C('\250'), C('\251'), C('\252'), C('\253'), C('\254'), C('\255'), C('\256'), C('\257'),
114 C('\260'), C('\261'), C('\262'), C('\263'), C('\264'), C('\265'), C('\266'), C('\267'),
115 C('\270'), C('\271'), C('\272'), C('\273'), C('\274'), C('\275'), C('\276'), C('\277'),
116 C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'),
117 C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'),
118 C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\327'),
119 C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\337'),
120 C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'),
121 C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'),
122 C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\367'),
123 C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\377'),
125 #elif 'a' == 0x81 /* it's EBCDIC */
127 /*00 NU SH SX EX PF HT LC DL */
128 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
129 /*08 SM VT FF CR SO SI */
130 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
131 /*10 DE D1 D2 TM RS NL BS IL */
132 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
133 /*18 CN EM CC C1 FS GS RS US */
134 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
135 /*20 DS SS FS BP LF EB EC */
136 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
137 /*28 SM C2 EQ AK BL */
138 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
139 /*30 SY PN RS UC ET */
140 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
142 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
144 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
145 /*48 CENT . < ( + | */
146 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
148 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
150 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
152 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
154 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
156 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
157 /*78 ` : # @ ' = " */
158 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
159 /*80 a b c d e f g */
160 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
162 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
163 /*90 j k l m n o p */
164 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
166 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
167 /*A0 ~ s t u v w x */
168 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
170 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
172 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
174 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
175 /*C0 { A B C D E F G */
176 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
178 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
179 /*D0 } J K L M N O P */
180 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
182 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
183 /*E0 \ S T U V W X */
184 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
186 0xA8, 0xA9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
187 /*F0 0 1 2 3 4 5 6 7 */
188 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
190 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
193 #include "You lose. You will need a translation table for your character set."
198 /* load_casetable --- for a non-ASCII locale, redo the table */
203 #if defined(LC_CTYPE)
206 static bool loaded = false;
208 if (loaded || do_traditional)
212 cp = setlocale(LC_CTYPE, NULL);
214 /* this is not per standard, but it's pretty safe */
215 if (cp == NULL || strcmp(cp, "C") == 0 || strcmp(cp, "POSIX") == 0)
219 for (i = 0200; i <= 0377; i++) {
220 if (isalpha(i) && islower(i) && i != toupper(i))
221 casetable[i] = toupper(i);
228 * This table maps node types to strings for debugging.
229 * KEEP IN SYNC WITH awk.h!!!!
232 static const char *const nodetypes[] = {
251 "Node_final --- this should never appear",
257 * This table maps Op codes to strings.
258 * KEEP IN SYNC WITH awk.h!!!!
261 static struct optypetab {
265 { "Op_illegal", NULL },
266 { "Op_times", " * " },
267 { "Op_times_i", " * " },
268 { "Op_quotient", " / " },
269 { "Op_quotient_i", " / " },
271 { "Op_mod_i", " % " },
272 { "Op_plus", " + " },
273 { "Op_plus_i", " + " },
274 { "Op_minus", " - " },
275 { "Op_minus_i", " - " },
277 { "Op_exp_i", " ^ " },
278 { "Op_concat", " " },
279 { "Op_line_range", NULL },
280 { "Op_cond_pair", ", " },
281 { "Op_subscript", "[]" },
282 { "Op_sub_array", "[]" },
283 { "Op_preincrement", "++" },
284 { "Op_predecrement", "--" },
285 { "Op_postincrement", "++" },
286 { "Op_postdecrement", "--" },
287 { "Op_unary_minus", "-" },
288 { "Op_field_spec", "$" },
290 { "Op_assign", " = " },
291 { "Op_store_var", " = " },
292 { "Op_store_sub", " = " },
293 { "Op_store_field", " = " },
294 { "Op_assign_times", " *= " },
295 { "Op_assign_quotient", " /= " },
296 { "Op_assign_mod", " %= " },
297 { "Op_assign_plus", " += " },
298 { "Op_assign_minus", " -= " },
299 { "Op_assign_exp", " ^= " },
300 { "Op_assign_concat", " " },
301 { "Op_and", " && " },
302 { "Op_and_final", NULL },
304 { "Op_or_final", NULL },
305 { "Op_equal", " == " },
306 { "Op_notequal", " != " },
307 { "Op_less", " < " },
308 { "Op_greater", " > " },
309 { "Op_leq", " <= " },
310 { "Op_geq", " >= " },
311 { "Op_match", " ~ " },
312 { "Op_match_rec", NULL },
313 { "Op_nomatch", " !~ " },
315 { "Op_K_case", "case" },
316 { "Op_K_default", "default" },
317 { "Op_K_break", "break" },
318 { "Op_K_continue", "continue" },
319 { "Op_K_print", "print" },
320 { "Op_K_print_rec", "print" },
321 { "Op_K_printf", "printf" },
322 { "Op_K_next", "next" },
323 { "Op_K_exit", "exit" },
324 { "Op_K_return", "return" },
325 { "Op_K_delete", "delete" },
326 { "Op_K_delete_loop", NULL },
327 { "Op_K_getline_redir", "getline" },
328 { "Op_K_getline", "getline" },
329 { "Op_K_nextfile", "nextfile" },
330 { "Op_builtin", NULL },
331 { "Op_sub_builtin", NULL },
332 { "Op_ext_builtin", NULL },
333 { "Op_old_ext_builtin", NULL }, /* temporary */
334 { "Op_in_array", " in " },
335 { "Op_func_call", NULL },
336 { "Op_indirect_func_call", NULL },
338 { "Op_push_arg", NULL },
339 { "Op_push_i", NULL },
340 { "Op_push_re", NULL },
341 { "Op_push_array", NULL },
342 { "Op_push_param", NULL },
343 { "Op_push_lhs", NULL },
344 { "Op_subscript_lhs", "[]" },
345 { "Op_field_spec_lhs", "$" },
346 { "Op_no_op", NULL },
349 { "Op_jmp_true", NULL },
350 { "Op_jmp_false", NULL },
351 { "Op_get_record", NULL },
352 { "Op_newfile", NULL },
353 { "Op_arrayfor_init", NULL },
354 { "Op_arrayfor_incr", NULL },
355 { "Op_arrayfor_final", NULL },
356 { "Op_var_update", NULL },
357 { "Op_var_assign", NULL },
358 { "Op_field_assign", NULL },
359 { "Op_subscript_assign", NULL },
360 { "Op_after_beginfile", NULL },
361 { "Op_after_endfile", NULL },
363 { "Op_exec_count", NULL },
364 { "Op_breakpoint", NULL },
366 { "Op_atexit", NULL },
368 { "Op_token", NULL },
369 { "Op_symbol", NULL },
372 { "Op_K_for", "for" },
373 { "Op_K_arrayfor", "for" },
374 { "Op_K_while", "while" },
375 { "Op_K_switch", "switch" },
377 { "Op_K_else", "else" },
378 { "Op_K_function", "function" },
379 { "Op_cond_exp", NULL },
380 { "Op_final --- this should never appear", NULL },
384 /* nodetype2str --- convert a node type into a printable value */
387 nodetype2str(NODETYPE type)
391 if (type >= Node_illegal && type <= Node_final)
392 return nodetypes[(int) type];
394 sprintf(buf, _("unknown nodetype %d"), (int) type);
398 /* opcode2str --- convert a opcode type into a printable value */
401 opcode2str(OPCODE op)
403 if (op >= Op_illegal && op < Op_final)
404 return optypes[(int) op].desc;
405 fatal(_("unknown opcode %d"), (int) op);
412 if (op >= Op_illegal && op < Op_final) {
413 if (optypes[(int) op].operator != NULL)
414 return optypes[(int) op].operator;
416 fatal(_("opcode %s not an operator or keyword"),
417 optypes[(int) op].desc);
419 fatal(_("unknown opcode %d"), (int) op);
424 /* flags2str --- make a flags value readable */
427 flags2str(int flagval)
429 static const struct flagtab values[] = {
430 { MALLOC, "MALLOC" },
431 { STRING, "STRING" },
432 { STRCUR, "STRCUR" },
433 { NUMCUR, "NUMCUR" },
434 { NUMBER, "NUMBER" },
435 { MAYBE_NUM, "MAYBE_NUM" },
437 { INTLSTR, "INTLSTR" },
438 { NUMINT, "NUMINT" },
439 { INTIND, "INTIND" },
440 { WSTRCUR, "WSTRCUR" },
443 { NULL_FIELD, "NULL_FIELD" },
444 { ARRAYMAXED, "ARRAYMAXED" },
445 { HALFHAT, "HALFHAT" },
446 { XARRAY, "XARRAY" },
450 return genflags2str(flagval, values);
453 /* genflags2str --- general routine to convert a flag value to a string */
456 genflags2str(int flagval, const struct flagtab *tab)
458 static char buffer[BUFSIZ];
460 int i, space_left, space_needed;
464 for (i = 0; tab[i].name != NULL; i++) {
465 if ((flagval & tab[i].val) != 0) {
467 * note the trick, we want 1 or 0 for whether we need
470 space_needed = (strlen(tab[i].name) + (sp != buffer));
471 if (space_left <= space_needed)
472 fatal(_("buffer overflow in genflags2str"));
478 strcpy(sp, tab[i].name);
480 space_left -= strlen(sp);
489 /* posix_compare --- compare strings using strcoll */
492 posix_compare(NODE *s1, NODE *s2)
498 save1 = s1->stptr[s1->stlen];
499 s1->stptr[s1->stlen] = '\0';
501 save2 = s2->stptr[s2->stlen];
502 s2->stptr[s2->stlen] = '\0';
504 if (gawk_mb_cur_max == 1) {
505 if (strlen(s1->stptr) == s1->stlen && strlen(s2->stptr) == s2->stlen)
506 ret = strcoll(s1->stptr, s2->stptr);
512 if (s1->stlen < s2->stlen)
517 b1[1] = b2[1] = '\0';
518 for (i = ret = 0, p1 = s1->stptr, p2 = s2->stptr;
523 ret = strcoll(b1, b2);
527 * Either worked through the strings or ret != 0.
528 * In either case, ret will be the right thing to return.
533 /* Similar logic, using wide characters */
534 (void) force_wstring(s1);
535 (void) force_wstring(s2);
537 if (wcslen(s1->wstptr) == s1->wstlen && wcslen(s2->wstptr) == s2->wstlen)
538 ret = wcscoll(s1->wstptr, s2->wstptr);
540 wchar_t b1[2], b2[2];
544 if (s1->wstlen < s2->wstlen)
549 b1[1] = b2[1] = L'\0';
550 for (i = ret = 0, p1 = s1->wstptr, p2 = s2->wstptr;
555 ret = wcscoll(b1, b2);
559 * Either worked through the strings or ret != 0.
560 * In either case, ret will be the right thing to return.
565 s1->stptr[s1->stlen] = save1;
566 s2->stptr[s2->stlen] = save2;
571 /* cmp_nodes --- compare two nodes, returning negative, 0, positive */
574 cmp_nodes(NODE *t1, NODE *t2)
583 if ((t1->flags & MAYBE_NUM) != 0)
584 (void) force_number(t1);
585 if ((t2->flags & MAYBE_NUM) != 0)
586 (void) force_number(t2);
587 if ((t1->flags & INTIND) != 0)
588 t1 = force_string(t1);
589 if ((t2->flags & INTIND) != 0)
590 t2 = force_string(t2);
592 if ((t1->flags & NUMBER) != 0 && (t2->flags & NUMBER) != 0)
593 return cmp_numbers(t1, t2);
595 (void) force_string(t1);
596 (void) force_string(t2);
600 if (len1 == 0 || len2 == 0)
604 return posix_compare(t1, t2);
606 l = (ldiff <= 0 ? len1 : len2);
608 const unsigned char *cp1 = (const unsigned char *) t1->stptr;
609 const unsigned char *cp2 = (const unsigned char *) t2->stptr;
612 if (gawk_mb_cur_max > 1) {
613 ret = strncasecmpmbs((const unsigned char *) cp1,
614 (const unsigned char *) cp2, l);
617 /* Could use tolower() here; see discussion above. */
618 for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
619 ret = casetable[*cp1] - casetable[*cp2];
621 ret = memcmp(t1->stptr, t2->stptr, l);
623 ret = ret == 0 ? ldiff : ret;
627 /* push_frame --- push a frame NODE onto stack */
632 static long max_fcall;
634 /* NB: frame numbering scheme as in GDB. frame_ptr => frame #0. */
637 if (fcall_list == NULL) {
639 emalloc(fcall_list, NODE **, (max_fcall + 1) * sizeof(NODE *), "push_frame");
640 } else if (fcall_count == max_fcall) {
642 erealloc(fcall_list, NODE **, (max_fcall + 1) * sizeof(NODE *), "push_frame");
646 memmove(fcall_list + 2, fcall_list + 1, (fcall_count - 1) * sizeof(NODE *));
651 /* pop_frame --- pop off a frame NODE*/
657 memmove(fcall_list + 1, fcall_list + 2, (fcall_count - 1) * sizeof(NODE *));
659 assert(fcall_count >= 0);
665 /* dump_fcall_stack --- print a backtrace of the awk function calls */
668 dump_fcall_stack(FILE *fp)
671 long i = 0, j, k = 0;
673 if (fcall_count == 0)
675 fprintf(fp, _("\n\t# Function Call Stack:\n\n"));
678 func = frame_ptr->func_node;
679 for (j = 0; j <= frame_ptr->num_tail_calls; j++)
680 fprintf(fp, "\t# %3ld. %s\n", k++, func->vname);
682 /* outer frames except main */
683 for (i = 1; i < fcall_count; i++) {
686 for (j = 0; j <= f->num_tail_calls; j++)
687 fprintf(fp, "\t# %3ld. %s\n", k++, func->vname);
690 fprintf(fp, "\t# %3ld. -- main --\n", k);
694 /* set_IGNORECASE --- update IGNORECASE as appropriate */
699 static bool warned = false;
700 NODE *n = IGNORECASE_node->var_value;
702 if ((do_lint || do_traditional) && ! warned) {
704 lintwarn(_("`IGNORECASE' is a gawk extension"));
709 else if ((n->flags & (STRING|STRCUR)) != 0) {
710 if ((n->flags & MAYBE_NUM) == 0) {
711 (void) force_string(n);
712 IGNORECASE = (n->stlen > 0);
714 (void) force_number(n);
715 IGNORECASE = ! iszero(n);
717 } else if ((n->flags & (NUMCUR|NUMBER)) != 0)
718 IGNORECASE = ! iszero(n);
720 IGNORECASE = false; /* shouldn't happen */
722 set_RS(); /* set_RS() calls set_FS() if need be, for us */
725 /* set_BINMODE --- set translation mode (OS/2, DOS, others) */
730 static bool warned = false;
732 NODE *v = BINMODE_node->var_value;
734 if ((do_lint || do_traditional) && ! warned) {
736 lintwarn(_("`BINMODE' is a gawk extension"));
739 BINMODE = TEXT_TRANSLATE;
740 else if ((v->flags & NUMBER) != 0) {
741 (void) force_number(v);
742 BINMODE = get_number_si(v);
743 /* Make sure the value is rational. */
744 if (BINMODE < TEXT_TRANSLATE)
745 BINMODE = TEXT_TRANSLATE;
746 else if (BINMODE > BINMODE_BOTH)
747 BINMODE = BINMODE_BOTH;
748 } else if ((v->flags & STRING) != 0) {
752 * Allow only one of the following:
753 * "0", "1", "2", "3",
754 * "r", "w", "rw", "wr"
755 * ANYTHING ELSE goes to 3. So there.
764 BINMODE = p[0] - '0';
767 BINMODE = BINMODE_INPUT;
770 BINMODE = BINMODE_OUTPUT;
773 BINMODE = BINMODE_BOTH;
781 BINMODE = BINMODE_BOTH;
786 BINMODE = BINMODE_BOTH;
794 lintwarn(_("BINMODE value `%s' is invalid, treated as 3"), p);
798 BINMODE = 3; /* shouldn't happen */
801 /* set_OFS --- update OFS related variables when OFS assigned to */
806 OFS_node->var_value = force_string(OFS_node->var_value);
807 OFS = OFS_node->var_value->stptr;
808 OFSlen = OFS_node->var_value->stlen;
812 /* set_ORS --- update ORS related variables when ORS assigned to */
817 ORS_node->var_value = force_string(ORS_node->var_value);
818 ORS = ORS_node->var_value->stptr;
819 ORSlen = ORS_node->var_value->stlen;
823 /* fmt_ok --- is the conversion format a valid one? */
825 NODE **fmt_list = NULL;
826 static int fmt_ok(NODE *n);
827 static int fmt_index(NODE *n);
832 NODE *tmp = force_string(n);
833 const char *p = tmp->stptr;
835 #if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
836 static const char float_formats[] = "efgEG";
838 static const char float_formats[] = "efgEFG";
840 #if defined(HAVE_LOCALE_H)
841 static const char flags[] = " +-#'";
843 static const char flags[] = " +-#";
848 while (*p && strchr(flags, *p) != NULL) /* flags */
850 while (*p && isdigit((unsigned char) *p)) /* width - %*.*g is NOT allowed */
852 if (*p == '\0' || (*p != '.' && ! isdigit((unsigned char) *p)))
856 while (*p && isdigit((unsigned char) *p)) /* precision */
858 if (*p == '\0' || strchr(float_formats, *p) == NULL)
865 /* fmt_index --- track values of OFMT and CONVFMT to keep semantics correct */
871 static int fmt_num = 4;
872 static int fmt_hiwater = 0;
874 if (fmt_list == NULL)
875 emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
877 while (ix < fmt_hiwater) {
878 if (cmp_nodes(fmt_list[ix], n) == 0)
883 n->stptr[n->stlen] = '\0';
884 if (do_lint && ! fmt_ok(n))
885 lintwarn(_("bad `%sFMT' specification `%s'"),
886 n == CONVFMT_node->var_value ? "CONV"
887 : n == OFMT_node->var_value ? "O"
890 if (fmt_hiwater >= fmt_num) {
892 erealloc(fmt_list, NODE **, fmt_num * sizeof(*fmt_list), "fmt_index");
894 fmt_list[fmt_hiwater] = dupnode(n);
895 return fmt_hiwater++;
898 /* set_OFMT --- track OFMT correctly */
903 OFMTidx = fmt_index(OFMT_node->var_value);
904 OFMT = fmt_list[OFMTidx]->stptr;
907 /* set_CONVFMT --- track CONVFMT correctly */
912 CONVFMTidx = fmt_index(CONVFMT_node->var_value);
913 CONVFMT = fmt_list[CONVFMTidx]->stptr;
916 /* set_LINT --- update LINT as appropriate */
922 int old_lint = do_lint;
923 NODE *n = LINT_node->var_value;
925 if ((n->flags & (STRING|STRCUR)) != 0) {
926 if ((n->flags & MAYBE_NUM) == 0) {
930 n = force_string(LINT_node->var_value);
934 do_flags |= DO_LINT_ALL;
935 if (lintlen == 5 && strncmp(lintval, "fatal", 5) == 0)
937 else if (lintlen == 7 && strncmp(lintval, "invalid", 7) == 0) {
938 do_flags &= ~ DO_LINT_ALL;
939 do_flags |= DO_LINT_INVALID;
943 do_flags &= ~(DO_LINT_ALL|DO_LINT_INVALID);
947 (void) force_number(n);
949 do_flags |= DO_LINT_ALL;
951 do_flags &= ~(DO_LINT_ALL|DO_LINT_INVALID);
954 } else if ((n->flags & (NUMCUR|NUMBER)) != 0) {
955 (void) force_number(n);
957 do_flags |= DO_LINT_ALL;
959 do_flags &= ~(DO_LINT_ALL|DO_LINT_INVALID);
962 do_flags &= ~(DO_LINT_ALL|DO_LINT_INVALID); /* shouldn't happen */
967 /* explicitly use warning() here, in case lintfunc == r_fatal */
968 if (old_lint != do_lint && old_lint && ! do_lint)
969 warning(_("turning off `--lint' due to assignment to `LINT'"));
971 /* inform plug-in api of change */
973 #endif /* ! NO_LINT */
976 /* set_TEXTDOMAIN --- update TEXTDOMAIN variable when TEXTDOMAIN assigned to */
984 tmp = TEXTDOMAIN_node->var_value = force_string(TEXTDOMAIN_node->var_value);
985 TEXTDOMAIN = tmp->stptr;
987 TEXTDOMAIN[len] = '\0';
989 * Note: don't call textdomain(); this value is for
990 * the awk program, not for gawk itself.
994 /* update_ERRNO_int --- update the value of ERRNO based on argument */
997 update_ERRNO_int(int errcode)
1002 cp = strerror(errcode);
1006 unref(ERRNO_node->var_value);
1007 ERRNO_node->var_value = make_string(cp, strlen(cp));
1010 /* update_ERRNO_string --- update ERRNO */
1013 update_ERRNO_string(const char *string)
1015 unref(ERRNO_node->var_value);
1016 ERRNO_node->var_value = make_string(string, strlen(string));
1019 /* unset_ERRNO --- eliminate the value of ERRNO */
1024 unref(ERRNO_node->var_value);
1025 ERRNO_node->var_value = dupnode(Nnull_string);
1028 /* update_NR --- update the value of NR */
1034 if (is_mpg_number(NR_node->var_value))
1035 (void) mpg_update_var(NR_node);
1038 if (NR_node->var_value->numbr != NR) {
1039 unref(NR_node->var_value);
1040 NR_node->var_value = make_number(NR);
1044 /* update_NF --- update the value of NF */
1051 l = get_number_si(NF_node->var_value);
1052 if (NF == -1 || l != NF) {
1054 (void) get_field(UNLIMITED - 1, NULL); /* parse record */
1055 unref(NF_node->var_value);
1056 NF_node->var_value = make_number(NF);
1060 /* update_FNR --- update the value of FNR */
1066 if (is_mpg_number(FNR_node->var_value))
1067 (void) mpg_update_var(FNR_node);
1070 if (FNR_node->var_value->numbr != FNR) {
1071 unref(FNR_node->var_value);
1072 FNR_node->var_value = make_number(FNR);
1077 NODE *frame_ptr; /* current frame */
1078 STACK_ITEM *stack_ptr = NULL;
1079 STACK_ITEM *stack_bottom;
1080 STACK_ITEM *stack_top;
1081 static unsigned long STACK_SIZE = 256; /* initial size of stack */
1082 int max_args = 0; /* maximum # of arguments to printf, print, sprintf,
1083 * or # of array subscripts, or adjacent strings
1084 * to be concatenated.
1086 NODE **args_array = NULL;
1088 /* grow_stack --- grow the size of runtime stack */
1090 /* N.B. stack_ptr points to the topmost occupied location
1091 * on the stack, not the first free location.
1098 erealloc(stack_bottom, STACK_ITEM *, STACK_SIZE * sizeof(STACK_ITEM), "grow_stack");
1099 stack_top = stack_bottom + STACK_SIZE - 1;
1100 stack_ptr = stack_bottom + STACK_SIZE / 2;
1106 * This returns a POINTER to a node pointer (var's value).
1107 * used to store the var's new value.
1111 r_get_lhs(NODE *n, bool reference)
1113 bool isparam = false;
1115 if (n->type == Node_param_list) {
1117 n = GET_PARAM(n->param_cnt);
1121 case Node_var_array:
1122 fatal(_("attempt to use array `%s' in a scalar context"),
1124 case Node_array_ref:
1125 if (n->orig_array->type == Node_var_array)
1126 fatal(_("attempt to use array `%s' in a scalar context"),
1128 if (n->orig_array->type != Node_var) {
1129 n->orig_array->type = Node_var;
1130 n->orig_array->var_value = Nnull_string;
1135 n->var_value = dupnode(Nnull_string);
1145 if (do_lint && reference && var_uninitialized(n))
1147 _("reference to uninitialized argument `%s'") :
1148 _("reference to uninitialized variable `%s'")),
1150 return & n->var_value;
1154 /* r_get_field --- get the address of a field node */
1156 static inline NODE **
1157 r_get_field(NODE *n, Func_ptr *assign, bool reference)
1165 if ((n->flags & NUMBER) == 0) {
1166 lintwarn(_("attempt to field reference from non-numeric value"));
1168 lintwarn(_("attempt to field reference from null string"));
1172 (void) force_number(n);
1173 field_num = get_number_si(n);
1176 fatal(_("attempt to access field %ld"), field_num);
1178 if (field_num == 0 && field0_valid) { /* short circuit */
1179 lhs = &fields_arr[0];
1181 *assign = reset_record;
1183 lhs = get_field(field_num, assign);
1184 if (do_lint && reference && ((*lhs)->flags & NULL_FIELD) != 0)
1185 lintwarn(_("reference to uninitialized field `$%ld'"),
1192 * calc_exp_posint --- calculate x^n for positive integral n,
1193 * using exponentiation by squaring without recursion.
1197 calc_exp_posint(AWKNUM x, long n)
1210 /* calc_exp --- calculate x1^x2 */
1213 calc_exp(AWKNUM x1, AWKNUM x2)
1217 if ((lx = x2) == x2) { /* integer exponent */
1220 return (lx > 0) ? calc_exp_posint(x1, lx)
1221 : 1.0 / calc_exp_posint(x1, -lx);
1223 return (AWKNUM) pow((double) x1, (double) x2);
1227 /* setup_frame --- setup new frame for function call */
1229 static INSTRUCTION *
1230 setup_frame(INSTRUCTION *pc)
1235 int pcount, arg_count, i, j;
1236 bool tail_optimize = false;
1239 pcount = f->param_cnt;
1241 arg_count = (pc + 1)->expr_count;
1243 /* tail recursion optimization */
1244 tail_optimize = ((pc + 1)->tail_call && do_optimize
1245 && ! do_debug && ! do_profile);
1247 if (tail_optimize) {
1248 /* free local vars of calling frame */
1253 func = frame_ptr->func_node;
1254 for (n = func->param_cnt, sp = frame_ptr->stack; n > 0; n--) {
1256 if (r->type == Node_var) /* local variable */
1257 DEREF(r->var_value);
1258 else if (r->type == Node_var_array) /* local array */
1261 sp = frame_ptr->stack;
1263 } else if (pcount > 0) {
1264 emalloc(sp, NODE **, pcount * sizeof(NODE *), "setup_frame");
1265 memset(sp, 0, pcount * sizeof(NODE *));
1269 /* check for extra args */
1270 if (arg_count > pcount) {
1272 _("function `%s' called with more arguments than declared"),
1276 if (r->type == Node_val)
1278 } while (--arg_count > pcount);
1281 for (i = 0, j = arg_count - 1; i < pcount; i++, j--) {
1286 memset(r, 0, sizeof(NODE));
1290 if (i >= arg_count) {
1291 /* local variable */
1292 r->type = Node_var_new;
1293 r->vname = fp[i].param;
1297 m = PEEK(j); /* arguments in reverse order on runtime stack */
1299 if (m->type == Node_param_list)
1300 m = GET_PARAM(m->param_cnt);
1304 case Node_var_array:
1305 r->type = Node_array_ref;
1306 r->orig_array = r->prev_array = m;
1309 case Node_array_ref:
1310 r->type = Node_array_ref;
1311 r->orig_array = m->orig_array;
1316 /* Untyped (Node_var_new) variable as param became a
1317 * scalar during evaluation of expression for a
1321 r->var_value = dupnode(Nnull_string);
1332 r->vname = fp[i].param;
1335 stack_adj(-arg_count); /* adjust stack pointer */
1337 if (tail_optimize) {
1338 frame_ptr->num_tail_calls++;
1342 if (pc->opcode == Op_indirect_func_call) {
1343 r = POP(); /* indirect var */
1347 frame_ptr->vname = source; /* save current source */
1349 if (do_profile || do_debug)
1350 push_frame(frame_ptr);
1352 /* save current frame in stack */
1355 /* setup new frame */
1357 frame_ptr->type = Node_frame;
1358 frame_ptr->stack = sp;
1359 frame_ptr->prev_frame_size = (stack_ptr - stack_bottom); /* size of the previous stack frame */
1360 frame_ptr->func_node = f;
1361 frame_ptr->num_tail_calls = 0;
1362 frame_ptr->vname = NULL;
1363 frame_ptr->reti = pc; /* on return execute pc->nexti */
1369 /* restore_frame --- clean up the stack and update frame */
1371 static INSTRUCTION *
1372 restore_frame(NODE *fp)
1380 func = frame_ptr->func_node;
1381 n = func->param_cnt;
1382 sp = frame_ptr->stack;
1384 for (; n > 0; n--) {
1386 if (r->type == Node_var) /* local variable */
1387 DEREF(r->var_value);
1388 else if (r->type == Node_var_array) /* local array */
1393 if (frame_ptr->stack != NULL)
1394 efree(frame_ptr->stack);
1395 ri = frame_ptr->reti; /* execution in calling frame
1396 * resumes from ri->nexti.
1398 freenode(frame_ptr);
1399 if (do_profile || do_debug)
1404 /* restore source */
1412 /* free_arrayfor --- free 'for (var in array)' related data */
1415 free_arrayfor(NODE *r)
1417 if (r->for_list != NULL) {
1419 size_t num_elems = r->for_list_size;
1420 NODE **list = r->for_list;
1421 while (num_elems > 0) {
1422 n = list[--num_elems];
1432 * unwind_stack --- pop items off the run-time stack;
1433 * 'n' is the # of items left in the stack.
1437 unwind_stack(long n)
1440 INSTRUCTION *cp = NULL;
1446 sp = stack_bottom + n;
1451 while ((r = POP()) != NULL) {
1454 cp = restore_frame(r);
1462 case Node_instruction:
1467 * Check `exiting' and don't produce an error for
1469 * func _fn0() { exit }
1470 * BEGIN { ARRAY[_fn0()] }
1472 if (in_main_context() && ! exiting)
1473 fatal(_("unwind_stack: unexpected type `%s'"),
1474 nodetype2str(r->type));
1478 * Node_var (e.g: trying to use scalar for array)
1479 * Node_regex/Node_dynregex
1492 /* pop_fcall --- pop off the innermost frame */
1493 #define pop_fcall() unwind_stack(frame_ptr->prev_frame_size)
1495 /* pop the run-time stack */
1496 #define pop_stack() (void) unwind_stack(0)
1500 eval_condition(NODE *t)
1502 if (t == node_Boolean[false])
1505 if (t == node_Boolean[true])
1508 if ((t->flags & MAYBE_NUM) != 0)
1510 else if ((t->flags & INTIND) != 0)
1513 if ((t->flags & NUMBER) != 0)
1516 return (t->stlen != 0);
1519 /* cmp_scalars -- compare two nodes on the stack */
1529 if (t1->type == Node_var_array) {
1531 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t1));
1533 di = cmp_nodes(t1, t2);
1539 /* op_assign --- assignment operators excluding = */
1542 op_assign(OPCODE op)
1546 AWKNUM x = 0.0, x1, x2;
1548 lhs = POP_ADDRESS();
1550 x1 = force_number(t1)->numbr;
1553 x2 = force_number(t2)->numbr;
1557 case Op_assign_plus:
1560 case Op_assign_minus:
1563 case Op_assign_times:
1566 case Op_assign_quotient:
1567 if (x2 == (AWKNUM) 0) {
1569 fatal(_("division by zero attempted in `/='"));
1574 if (x2 == (AWKNUM) 0) {
1576 fatal(_("division by zero attempted in `%%='"));
1580 #else /* ! HAVE_FMOD */
1581 (void) modf(x1 / x2, &x);
1583 #endif /* ! HAVE_FMOD */
1586 x = calc_exp((double) x1, (double) x2);
1592 if (t1->valref == 1 && t1->flags == (MALLOC|NUMCUR|NUMBER)) {
1597 t1 = *lhs = make_number(x);
1604 /* PUSH_CODE --- push a code onto the runtime stack */
1607 PUSH_CODE(INSTRUCTION *cp)
1611 r->type = Node_instruction;
1616 /* POP_CODE --- pop a code off the runtime stack */
1631 * Implementation of BEGINFILE and ENDFILE requires saving an execution
1632 * state and the ability to return to that state. The state is
1633 * defined by the instruction triggering the BEGINFILE/ENDFILE rule, the
1634 * run-time stack, the rule and the source file. The source line is available in
1635 * the instruction and hence is not considered a part of the execution state.
1639 typedef struct exec_state {
1640 struct exec_state *next;
1642 INSTRUCTION *cptr; /* either getline (Op_K_getline) or the
1643 * implicit "open-file, read-record" loop (Op_newfile).
1646 int rule; /* rule for the INSTRUCTION */
1648 long stack_size; /* For this particular usage, it is sufficient to save
1649 * only the size of the call stack. We do not
1650 * store the actual stack pointer to avoid problems
1651 * in case the stack gets realloc-ed.
1654 const char *source; /* source file for the INSTRUCTION */
1657 static EXEC_STATE exec_state_stack;
1659 /* push_exec_state --- save an execution state on stack */
1662 push_exec_state(INSTRUCTION *cp, int rule, char *src, STACK_ITEM *sp)
1666 emalloc(es, EXEC_STATE *, sizeof(EXEC_STATE), "push_exec_state");
1669 es->stack_size = (sp - stack_bottom) + 1;
1671 es->next = exec_state_stack.next;
1672 exec_state_stack.next = es;
1676 /* pop_exec_state --- pop one execution state off the stack */
1678 static INSTRUCTION *
1679 pop_exec_state(int *rule, char **src, long *sz)
1684 es = exec_state_stack.next;
1691 *src = (char *) es->source;
1693 *sz = es->stack_size;
1694 exec_state_stack.next = es->next;
1700 /* register_exec_hook --- add exec hooks in the interpreter. */
1703 register_exec_hook(Func_pre_exec preh, Func_post_exec posth)
1708 * multiple post-exec hooks aren't supported. post-exec hook is mainly
1709 * for use by the debugger.
1712 if (! preh || (post_execute && posth))
1715 if (num_exec_hook == MAX_EXEC_HOOKS)
1719 * Add to the beginning of the array but do not displace the
1720 * debugger hook if it exists.
1722 if (num_exec_hook > 0) {
1724 if (num_exec_hook > pos)
1725 memmove(pre_execute + pos + 1, pre_execute + pos,
1726 (num_exec_hook - pos) * sizeof (preh));
1728 pre_execute[pos] = preh;
1732 post_execute = posth;
1738 /* interpreter routine when not debugging */
1739 #include "interpret.h"
1741 /* interpreter routine with exec hook(s). Used when debugging and/or with MPFR. */
1742 #define r_interpret h_interpret
1744 #include "interpret.h"
1754 if ((newval = getenv_long("GAWK_STACKSIZE")) > 0)
1755 STACK_SIZE = newval;
1757 emalloc(stack_bottom, STACK_ITEM *, STACK_SIZE * sizeof(STACK_ITEM), "grow_stack");
1758 stack_ptr = stack_bottom - 1;
1759 stack_top = stack_bottom + STACK_SIZE - 1;
1761 /* initialize frame pointer */
1763 frame_ptr->type = Node_frame;
1764 frame_ptr->stack = NULL;
1765 frame_ptr->func_node = NULL; /* in main */
1766 frame_ptr->num_tail_calls = 0;
1767 frame_ptr->vname = NULL;
1769 /* initialize true and false nodes */
1770 node_Boolean[false] = make_number(0.0);
1771 node_Boolean[true] = make_number(1.0);
1772 if (! is_mpg_number(node_Boolean[false])) {
1773 node_Boolean[false]->flags |= NUMINT;
1774 node_Boolean[true]->flags |= NUMINT;
1778 * Select the interpreter routine. The version without
1779 * any exec hook support (r_interpret) is faster by about
1780 * 5%, or more depending on the opcodes.
1783 if (num_exec_hook > 0)
1784 interpret = h_interpret;
1786 interpret = r_interpret;