2 * main.c -- Code generator and main program for gawk.
6 * Copyright (C) 1986, 1988, 1989, 1991-2012 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
26 /* FIX THIS BEFORE EVERY RELEASE: */
27 #define UPDATE_YEAR 2012
36 #define DEFAULT_PROFILE "awkprof.out" /* where to put profile */
37 #define DEFAULT_VARFILE "awkvars.out" /* where to put vars */
39 static const char *varfile = DEFAULT_VARFILE;
40 const char *command_file = NULL; /* debugger commands */
42 static void usage(int exitval, FILE *fp) ATTRIBUTE_NORETURN;
43 static void copyleft(void) ATTRIBUTE_NORETURN;
44 static void cmdline_fs(char *str);
45 static void init_args(int argc0, int argc, const char *argv0, char **argv);
46 static void init_vars(void);
47 static NODE *load_environ(void);
48 static NODE *load_procinfo(void);
49 static RETSIGTYPE catchsig(int sig);
50 #ifdef HAVE_LIBSIGSEGV
51 static int catchsegv(void *fault_address, int serious);
52 static void catchstackoverflow(int emergency, stackoverflow_context_t scp);
54 static void nostalgia(void) ATTRIBUTE_NORETURN;
55 static void version(void) ATTRIBUTE_NORETURN;
56 static void init_fds(void);
57 static void init_groupset(void);
59 static void save_argv(int, char **);
61 /* These nodes store all the special variables AWK uses */
62 NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node;
63 NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node;
64 NODE *FNR_node, *FPAT_node, *FS_node, *IGNORECASE_node, *LINT_node;
65 NODE *NF_node, *NR_node, *OFMT_node, *OFS_node, *ORS_node, *PROCINFO_node;
66 NODE *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
67 NODE *TEXTDOMAIN_node;
69 NODE *_r; /* used as temporary in stack macros */
82 * CONVFMT is a convenience pointer for the current number to string format.
83 * We must supply an initial value to avoid recursion problems of
84 * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
87 char *CONVFMT = "%.6g";
89 NODE *Nnull_string; /* The global null string */
91 #if defined(HAVE_LOCALE_H)
92 struct lconv loc; /* current locale */
93 static void init_locale(struct lconv *l);
94 #endif /* defined(HAVE_LOCALE_H) */
96 /* The name the program was invoked under, for error messages */
99 /* A block of AWK code to be run */
100 INSTRUCTION *code_block = NULL;
102 char **d_argv; /* saved argv for debugger restarting */
104 * List of rules and functions with first and last instruction (source_line)
105 * information; used for profiling and debugging.
107 INSTRUCTION *rule_list;
109 int exit_val = EXIT_SUCCESS; /* exit value */
111 #if defined(YYDEBUG) || defined(GAWKDEBUG)
115 SRCFILE *srcfiles; /* source files */
118 * structure to remember variable pre-assignments
121 enum assign_type { PRE_ASSIGN = 1, PRE_ASSIGN_FS } type;
125 static struct pre_assign *preassigns = NULL; /* requested via -v or -F */
126 static long numassigns = -1; /* how many of them */
128 static int disallow_var_assigns = FALSE; /* true for --exec */
130 static void add_preassign(enum assign_type type, char *val);
135 int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */
136 int do_posix = FALSE; /* turn off gnu and unix extensions */
137 int do_lint = FALSE; /* provide warnings about questionable stuff */
138 int do_lint_old = FALSE; /* warn about stuff not in V7 awk */
139 int do_intl = FALSE; /* dump locale-izable strings to stdout */
140 int do_non_decimal_data = FALSE; /* allow octal/hex C style DATA. Use with caution! */
141 int do_nostalgia = FALSE; /* provide a blast from the past */
142 int do_intervals = FALSE; /* allow {...,...} in regexps, see resetup() */
143 int do_profiling = FALSE; /* profile and pretty print the program */
144 int do_dump_vars = FALSE; /* dump all global variables at end */
145 int do_tidy_mem = FALSE; /* release vars when done */
146 int do_optimize = TRUE; /* apply default optimizations */
147 int do_binary = FALSE; /* hands off my data! */
148 int do_sandbox = FALSE; /* sandbox mode - disable 'system' function & redirections */
149 int use_lc_numeric = FALSE; /* obey locale for decimal point */
152 int gawk_mb_cur_max; /* MB_CUR_MAX value, see comment in main() */
155 FILE *output_fp; /* default output for debugger */
156 int output_is_tty = FALSE; /* control flushing of output */
158 /* default format for strftime(), available via PROCINFO */
159 const char def_strftime_format[] = "%a %b %e %H:%M:%S %Z %Y";
161 extern const char *version_string;
163 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
164 GETGROUPS_T *groupset; /* current group set */
165 int ngroups; /* size of said set */
168 void (*lintfunc)(const char *mesg, ...) = warning;
171 * Note: reserve -D for future use, to merge dgawk into gawk.
172 * Note: reserve -l for future use, for xgawk's -l option.
174 static const struct option optab[] = {
175 { "traditional", no_argument, & do_traditional, 1 },
176 { "lint", optional_argument, NULL, 'L' },
177 { "lint-old", no_argument, & do_lint_old, 1 },
178 { "optimize", no_argument, & do_optimize, 'O' },
179 { "posix", no_argument, & do_posix, 1 },
180 { "command", required_argument, NULL, 'R' },
181 { "nostalgia", no_argument, & do_nostalgia, 1 },
182 { "gen-pot", no_argument, & do_intl, 1 },
183 { "non-decimal-data", no_argument, & do_non_decimal_data, 1 },
184 { "profile", optional_argument, NULL, 'p' },
185 { "copyright", no_argument, NULL, 'C' },
186 { "field-separator", required_argument, NULL, 'F' },
187 { "file", required_argument, NULL, 'f' },
188 { "re-interval", no_argument, & do_intervals, 1 },
189 { "source", required_argument, NULL, 'e' },
190 { "dump-variables", optional_argument, NULL, 'd' },
191 { "assign", required_argument, NULL, 'v' },
192 { "version", no_argument, NULL, 'V' },
193 { "help", no_argument, NULL, 'h' },
194 { "exec", required_argument, NULL, 'E' },
195 { "use-lc-numeric", no_argument, & use_lc_numeric, 1 },
196 { "characters-as-bytes", no_argument, & do_binary, 'b' },
197 { "sandbox", no_argument, & do_sandbox, 1 },
198 #if defined(YYDEBUG) || defined(GAWKDEBUG)
199 { "parsedebug", no_argument, NULL, 'Y' },
201 { NULL, 0, NULL, '\0' }
206 #define do_lint_old 0
209 /* main --- process args, parse program, run it, clean up */
212 main(int argc, char **argv)
215 * The + on the front tells GNU getopt not to rearrange argv.
216 * Note: reserve -D for future use, to merge dgawk into gawk.
217 * Note: reserve -l for future use, for xgawk's -l option.
219 const char *optlist = "+F:f:v:W;m:bcCd::e:E:gh:L:nNOp::PrR:StVY";
220 int stopped_early = FALSE;
227 /* do these checks early */
228 do_tidy_mem = (getenv("TIDYMEM") != NULL);
234 #endif /* HAVE_MTRACE */
235 #endif /* HAVE_MCHECK_H */
237 #if defined(LC_CTYPE)
238 setlocale(LC_CTYPE, "");
240 #if defined(LC_COLLATE)
241 setlocale(LC_COLLATE, "");
243 #if defined(LC_MESSAGES)
244 setlocale(LC_MESSAGES, "");
246 #if defined(LC_NUMERIC) && defined(HAVE_LOCALE_H)
248 * Force the issue here. According to POSIX 2001, decimal
249 * point is used for parsing source code and for command-line
250 * assignments and the locale value for processing input,
251 * number to string conversion, and printing output.
253 * 10/2005 --- see below also; we now only use the locale's
254 * decimal point if do_posix in effect.
257 * This is a mess. We need to get the locale's numeric info for
258 * the thousands separator for the %'d flag.
260 setlocale(LC_NUMERIC, "");
262 setlocale(LC_NUMERIC, "C");
265 setlocale(LC_TIME, "");
270 * In glibc, MB_CUR_MAX is actually a function. This value is
271 * tested *a lot* in many speed-critical places in gawk. Caching
272 * this value once makes a speed difference.
274 gawk_mb_cur_max = MB_CUR_MAX;
275 /* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */
277 /* init the cache for checking bytes if they're characters */
281 (void) bindtextdomain(PACKAGE, LOCALEDIR);
282 (void) textdomain(PACKAGE);
284 (void) signal(SIGFPE, catchsig);
286 (void) signal(SIGBUS, catchsig);
289 (void) sigsegv_install_handler(catchsegv);
290 #define STACK_SIZE (16*1024)
291 emalloc(extra_stack, char *, STACK_SIZE, "main");
292 (void) stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE);
295 myname = gawk_name(argv[0]);
296 os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
299 usage(EXIT_FAILURE, stderr);
301 /* Robustness: check that file descriptors 0, 1, 2 are open */
304 /* init array handling. */
309 /* we do error messages ourselves on invalid options */
312 /* copy argv before getopt gets to it; used to restart the debugger */
313 save_argv(argc, argv);
315 /* initialize global (main) execution context */
316 push_context(new_context());
318 /* option processing. ready, set, go! */
319 for (optopt = 0, old_optind = 1;
320 (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
321 optopt = 0, old_optind = optind) {
327 add_preassign(PRE_ASSIGN_FS, optarg);
331 disallow_var_assigns = TRUE;
335 * Allow multiple -f options.
336 * This makes function libraries real easy.
337 * Most of the magic is in the scanner.
339 * The following is to allow for whitespace at the end
340 * of a #! /bin/gawk line in an executable file
343 if (argv[optind-1] != optarg)
344 while (isspace((unsigned char) *scan))
346 src = (*scan == '\0' ? argv[optind++] : optarg);
347 (void) add_srcfile((src && src[0] == '-' && src[1] == '\0') ?
348 SRC_STDIN : SRC_FILE,
349 src, srcfiles, NULL, NULL);
354 add_preassign(PRE_ASSIGN, optarg);
360 * -mf nnn set # fields, gawk ignores
361 * -mr nnn set record length, ditto
363 * As of at least 10/2007, BWK awk also ignores it.
366 lintwarn(_("`-m[fr]' option irrelevant in gawk"));
367 if (optarg[0] != 'r' && optarg[0] != 'f')
368 warning(_("-m option usage: `-m[fr] nnn'"));
376 do_traditional = TRUE;
385 if (optarg != NULL && optarg[0] != '\0')
390 if (optarg[0] == '\0')
391 warning(_("empty argument to `-e/--source' ignored"));
393 (void) add_srcfile(SRC_CMDLINE, optarg, srcfiles, NULL, NULL);
401 /* write usage to stdout, per GNU coding stds */
402 usage(EXIT_SUCCESS, stdout);
408 if (optarg != NULL) {
409 if (strcmp(optarg, "fatal") == 0)
411 else if (strcmp(optarg, "invalid") == 0)
412 do_lint = LINT_INVALID;
426 do_non_decimal_data = TRUE;
430 use_lc_numeric = TRUE;
440 set_prof_file(optarg);
442 set_prof_file(DEFAULT_PROFILE);
461 case 'W': /* gawk specific options - now in getopt_long */
462 fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"),
468 * getopt_long found an option that sets a variable
469 * instead of returning a letter. Do nothing, just
470 * cycle around for the next one.
476 #if defined(YYDEBUG) || defined(GAWKDEBUG)
482 if (c == 'R' && which_gawk == exe_debugging) {
483 if (optarg[0] != '\0')
484 command_file = optarg;
487 /* if not debugging or dgawk, fall through */
492 * If not posix, an unrecognized option stops argument
493 * processing so that it can go into ARGV for the awk
494 * program to see. This makes use of ``#! /bin/gawk -f''
497 * However, it's never simple. If optopt is set,
498 * an option that requires an argument didn't get the
499 * argument. We care because if opterr is 0, then
500 * getopt_long won't print the error message for us.
503 && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
505 * can't just do optind--. In case of an
506 * option with >= 2 letters, getopt_long
507 * won't have incremented optind.
510 stopped_early = TRUE;
512 } else if (optopt != '\0') {
513 /* Use POSIX required message format */
515 _("%s: option requires an argument -- %c\n"),
517 usage(EXIT_FAILURE, stderr);
520 let getopt print error message for us */
523 if (c == 'E') /* --exec ends option processing */
531 /* check for POSIXLY_CORRECT environment variable */
532 if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
536 _("environment variable `POSIXLY_CORRECT' set: turning on `--posix'"));
540 use_lc_numeric = TRUE;
541 if (do_traditional) /* both on command line */
542 warning(_("`--posix' overrides `--traditional'"));
544 do_traditional = TRUE;
546 * POSIX compliance also implies
547 * no GNU extensions either.
551 if (do_traditional && do_non_decimal_data) {
552 do_non_decimal_data = FALSE;
553 warning(_("`--posix'/`--traditional' overrides `--non-decimal-data'"));
556 if (do_lint && os_is_setuid())
557 warning(_("running %s setuid root may be a security problem"), myname);
562 warning(_("`--posix' overrides `--binary'"));
564 gawk_mb_cur_max = 1; /* hands off my data! */
569 * Force profiling if this is pgawk.
570 * Don't bother if the command line already set profiling up.
573 init_profiling(& do_profiling, DEFAULT_PROFILE);
578 /* initialize the null string */
579 Nnull_string = make_string("", 0);
580 Nnull_string->numbr = 0.0;
581 Nnull_string->type = Node_val;
582 Nnull_string->flags = (PERM|STRCUR|STRING|NUMCUR|NUMBER);
585 * Tell the regex routines how they should work.
586 * Do this before initializing variables, since
587 * they could want to do a regexp compile.
593 /* Set up the special variables */
596 /* Set up the field variables */
599 /* Now process the pre-assignments */
600 for (i = 0; i <= numassigns; i++) {
601 if (preassigns[i].type == PRE_ASSIGN)
602 (void) arg_assign(preassigns[i].val, TRUE);
603 else /* PRE_ASSIGN_FS */
604 cmdline_fs(preassigns[i].val);
605 efree(preassigns[i].val);
608 if (preassigns != NULL)
611 if ((BINMODE & 1) != 0)
612 if (os_setbinmode(fileno(stdin), O_BINARY) == -1)
613 fatal(_("can't set binary mode on stdin (%s)"), strerror(errno));
614 if ((BINMODE & 2) != 0) {
615 if (os_setbinmode(fileno(stdout), O_BINARY) == -1)
616 fatal(_("can't set binary mode on stdout (%s)"), strerror(errno));
617 if (os_setbinmode(fileno(stderr), O_BINARY) == -1)
618 fatal(_("can't set binary mode on stderr (%s)"), strerror(errno));
622 setbuf(stdout, (char *) NULL); /* make debugging easier */
624 if (os_isatty(fileno(stdout)))
625 output_is_tty = TRUE;
626 /* No -f or --source options, use next arg */
627 if (srcfiles->next == srcfiles) {
628 if (optind > argc - 1 || stopped_early) /* no args left or no program */
629 usage(EXIT_FAILURE, stderr);
630 (void) add_srcfile(SRC_CMDLINE, argv[optind], srcfiles, NULL, NULL);
634 init_args(optind, argc,
635 do_posix ? argv[0] : myname,
638 #if defined(LC_NUMERIC)
641 * Pre-initing the variables with arg_assign() can change the
642 * locale. Force it to C before parsing the program.
644 setlocale(LC_NUMERIC, "C");
646 /* Read in the program */
647 if (parse_program(&code_block) != 0)
656 if (do_lint && code_block->nexti->opcode == Op_atexit)
657 lintwarn(_("no program text at all!"));
659 init_profiling_signals();
661 #if defined(LC_NUMERIC)
663 * See comment above about using locale's decimal point.
666 * Bitter experience teaches us that most people the world over
667 * use period as the decimal point, not whatever their locale
668 * uses. Thus, only use the locale's decimal point if being
669 * posixly anal-retentive.
672 * Be a little bit kinder. Allow the --use-lc-numeric option
673 * to also use the local decimal point. This avoids the draconian
674 * strictness of POSIX mode if someone just wants to parse their
675 * data using the local decimal point.
678 setlocale(LC_NUMERIC, "");
681 interpret(code_block);
684 dump_prog(code_block);
694 /* keep valgrind happier */
698 exit(exit_val); /* more portable */
699 return exit_val; /* to suppress warnings */
702 /* add_preassign --- add one element to preassigns */
705 add_preassign(enum assign_type type, char *val)
707 static long alloc_assigns; /* for how many are allocated */
713 if (preassigns == NULL) {
714 emalloc(preassigns, struct pre_assign *,
715 INIT_SRC * sizeof(struct pre_assign), "add_preassign");
716 alloc_assigns = INIT_SRC;
717 } else if (numassigns >= alloc_assigns) {
719 erealloc(preassigns, struct pre_assign *,
720 alloc_assigns * sizeof(struct pre_assign), "add_preassigns");
722 preassigns[numassigns].type = type;
723 preassigns[numassigns].val = estrdup(val, strlen(val));
728 /* usage --- print usage information and exit */
731 usage(int exitval, FILE *fp)
733 /* Not factoring out common stuff makes it easier to translate. */
734 fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"),
736 fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"),
737 myname, quote, quote);
739 /* GNU long options info. This is too many options. */
741 fputs(_("POSIX options:\t\tGNU long options: (standard)\n"), fp);
742 fputs(_("\t-f progfile\t\t--file=progfile\n"), fp);
743 fputs(_("\t-F fs\t\t\t--field-separator=fs\n"), fp);
744 fputs(_("\t-v var=val\t\t--assign=var=val\n"), fp);
745 fputs(_("Short options:\t\tGNU long options: (extensions)\n"), fp);
746 fputs(_("\t-b\t\t\t--characters-as-bytes\n"), fp);
747 fputs(_("\t-c\t\t\t--traditional\n"), fp);
748 fputs(_("\t-C\t\t\t--copyright\n"), fp);
749 fputs(_("\t-d[file]\t\t--dump-variables[=file]\n"), fp);
750 fputs(_("\t-e 'program-text'\t--source='program-text'\n"), fp);
751 fputs(_("\t-E file\t\t\t--exec=file\n"), fp);
752 fputs(_("\t-g\t\t\t--gen-pot\n"), fp);
753 fputs(_("\t-h\t\t\t--help\n"), fp);
754 fputs(_("\t-L [fatal]\t\t--lint[=fatal]\n"), fp);
755 fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp);
756 fputs(_("\t-N\t\t\t--use-lc-numeric\n"), fp);
757 fputs(_("\t-O\t\t\t--optimize\n"), fp);
758 fputs(_("\t-p[file]\t\t--profile[=file]\n"), fp);
759 fputs(_("\t-P\t\t\t--posix\n"), fp);
760 fputs(_("\t-r\t\t\t--re-interval\n"), fp);
761 if (which_gawk == exe_debugging)
762 fputs(_("\t-R file\t\t\t--command=file\n"), fp);
763 fputs(_("\t-S\t\t\t--sandbox\n"), fp);
764 fputs(_("\t-t\t\t\t--lint-old\n"), fp);
765 fputs(_("\t-V\t\t\t--version\n"), fp);
767 fputs(_("\t-W nostalgia\t\t--nostalgia\n"), fp);
770 fputs(_("\t-Y\t\t--parsedebug\n"), fp);
773 /* This is one string to make things easier on translators. */
774 /* TRANSLATORS: --help output 5 (end)
775 TRANSLATORS: the placeholder indicates the bug-reporting address
776 for this application. Please add _another line_ with the
777 address for translation bugs.
779 fputs(_("\nTo report bugs, see node `Bugs' in `gawk.info', which is\n\
780 section `Reporting Problems and Bugs' in the printed version.\n\n"), fp);
783 fputs(_("gawk is a pattern scanning and processing language.\n\
784 By default it reads standard input and writes standard output.\n\n"), fp);
787 fputs(_("Examples:\n\tgawk '{ sum += $1 }; END { print sum }' file\n\
788 \tgawk -F: '{ print $1 }' /etc/passwd\n"), fp);
794 warning(_("error writing standard output (%s)"), strerror(errno));
801 /* copyleft --- print out the short GNU copyright information */
806 static const char blurb_part1[] =
807 N_("Copyright (C) 1989, 1991-%d Free Software Foundation.\n\
809 This program is free software; you can redistribute it and/or modify\n\
810 it under the terms of the GNU General Public License as published by\n\
811 the Free Software Foundation; either version 3 of the License, or\n\
812 (at your option) any later version.\n\
814 static const char blurb_part2[] =
815 N_("This program is distributed in the hope that it will be useful,\n\
816 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
817 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
818 GNU General Public License for more details.\n\
820 static const char blurb_part3[] =
821 N_("You should have received a copy of the GNU General Public License\n\
822 along with this program. If not, see http://www.gnu.org/licenses/.\n");
824 /* multiple blurbs are needed for some brain dead compilers. */
825 printf(_(blurb_part1), UPDATE_YEAR); /* Last update year */
826 fputs(_(blurb_part2), stdout);
827 fputs(_(blurb_part3), stdout);
830 if (ferror(stdout)) {
831 warning(_("error writing standard output (%s)"), strerror(errno));
838 /* cmdline_fs --- set FS from the command line */
841 cmdline_fs(char *str)
845 tmp = &FS_node->var_value;
848 * Only if in full compatibility mode check for the stupid special
849 * case so -F\t works as documented in awk book even though the shell
850 * hands us -Ft. Bleah!
852 * Thankfully, POSIX didn't propagate this "feature".
854 if (str[0] == 't' && str[1] == '\0') {
856 lintwarn(_("-Ft does not set FS to tab in POSIX awk"));
857 if (do_traditional && ! do_posix)
860 *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
864 /* init_args --- set up ARGV from stuff on the command line */
867 init_args(int argc0, int argc, const char *argv0, char **argv)
873 ARGV_node = install_symbol(estrdup("ARGV", 4), mk_symbol(Node_var_array, (NODE *) NULL));
874 tmp = make_number(0.0);
875 aptr = assoc_lookup(ARGV_node, tmp, FALSE);
878 *aptr = make_string(argv0, strlen(argv0));
879 (*aptr)->flags |= MAYBE_NUM;
880 for (i = argc0, j = 1; i < argc; i++, j++) {
881 tmp = make_number((AWKNUM) j);
882 aptr = assoc_lookup(ARGV_node, tmp, FALSE);
885 *aptr = make_string(argv[i], strlen(argv[i]));
886 (*aptr)->flags |= MAYBE_NUM;
889 ARGC_node = install_symbol(estrdup("ARGC", 4),
890 mk_symbol(Node_var, make_number((AWKNUM) j)));
894 * Set all the special variables to their initial values.
895 * Note that some of the variables that have set_FOO routines should
896 * *N*O*T* have those routines called upon initialization, and thus
897 * they have NULL entries in that field. This is notably true of FS
910 #define NO_INSTALL 0x01
911 #define NON_STANDARD 0x02
914 static const struct varinit varinit[] = {
915 {NULL, "ARGC", NULL, 0, NULL, NULL, FALSE, NO_INSTALL },
916 {&ARGIND_node, "ARGIND", NULL, 0, NULL, NULL, FALSE, NON_STANDARD },
917 {NULL, "ARGV", NULL, 0, NULL, NULL, FALSE, NO_INSTALL },
918 {&BINMODE_node, "BINMODE", NULL, 0, NULL, set_BINMODE, FALSE, NON_STANDARD },
919 {&CONVFMT_node, "CONVFMT", "%.6g", 0, NULL, set_CONVFMT,TRUE, 0 },
920 {NULL, "ENVIRON", NULL, 0, NULL, NULL, FALSE, NO_INSTALL },
921 {&ERRNO_node, "ERRNO", "", 0, NULL, NULL, FALSE, NON_STANDARD },
922 {&FIELDWIDTHS_node, "FIELDWIDTHS", "", 0, NULL, set_FIELDWIDTHS, FALSE, NON_STANDARD },
923 {&FILENAME_node, "FILENAME", "", 0, NULL, NULL, FALSE, 0 },
924 {&FNR_node, "FNR", NULL, 0, update_FNR, set_FNR, TRUE, 0 },
925 {&FS_node, "FS", " ", 0, NULL, set_FS, FALSE, 0 },
926 {&FPAT_node, "FPAT", "[^[:space:]]+", 0, NULL, set_FPAT, FALSE, NON_STANDARD },
927 {&IGNORECASE_node, "IGNORECASE", NULL, 0, NULL, set_IGNORECASE, FALSE, NON_STANDARD },
928 {&LINT_node, "LINT", NULL, 0, NULL, set_LINT, FALSE, NON_STANDARD },
929 {&NF_node, "NF", NULL, -1, update_NF, set_NF, FALSE, 0 },
930 {&NR_node, "NR", NULL, 0, update_NR, set_NR, TRUE, 0 },
931 {&OFMT_node, "OFMT", "%.6g", 0, NULL, set_OFMT, TRUE, 0 },
932 {&OFS_node, "OFS", " ", 0, NULL, set_OFS, TRUE, 0 },
933 {&ORS_node, "ORS", "\n", 0, NULL, set_ORS, TRUE, 0 },
934 {NULL, "PROCINFO", NULL, 0, NULL, NULL, FALSE, NO_INSTALL | NON_STANDARD },
935 {&RLENGTH_node, "RLENGTH", NULL, 0, NULL, NULL, FALSE, 0 },
936 {&RS_node, "RS", "\n", 0, NULL, set_RS, TRUE, 0 },
937 {&RSTART_node, "RSTART", NULL, 0, NULL, NULL, FALSE, 0 },
938 {&RT_node, "RT", "", 0, NULL, NULL, FALSE, NON_STANDARD },
939 {&SUBSEP_node, "SUBSEP", "\034", 0, NULL, set_SUBSEP, TRUE, 0 },
940 {&TEXTDOMAIN_node, "TEXTDOMAIN", "messages", 0, NULL, set_TEXTDOMAIN, TRUE, NON_STANDARD },
941 {0, NULL, NULL, 0, NULL, NULL, FALSE, 0 },
944 /* init_vars --- actually initialize everything in the symbol table */
949 const struct varinit *vp;
952 for (vp = varinit; vp->name != NULL; vp++) {
953 if ((vp->flags & NO_INSTALL) != 0)
955 n = mk_symbol(Node_var, vp->strval == NULL
956 ? make_number(vp->numval)
957 : make_string(vp->strval, strlen(vp->strval)));
958 n->var_assign = (Func_ptr) vp->assign;
959 n->var_update = (Func_ptr) vp->update;
961 *(vp->spec) = install_symbol(estrdup(vp->name, strlen(vp->name)), n);
966 /* Set up deferred variables (loaded only when accessed). */
967 if (! do_traditional)
968 register_deferred_variable("PROCINFO", load_procinfo);
969 register_deferred_variable("ENVIRON", load_environ);
972 /* load_environ --- populate the ENVIRON array */
977 #if ! (defined(VMS) && defined(__DECC))
978 extern char **environ;
985 ENVIRON_node = install_symbol(estrdup("ENVIRON", 7),
986 mk_symbol(Node_var_array, (NODE *) NULL));
988 for (i = 0; environ[i] != NULL; i++) {
989 static char nullstr[] = "";
992 val = strchr(var, '=');
997 tmp = make_string(var, strlen(var));
998 aptr = assoc_lookup(ENVIRON_node, tmp, FALSE);
1001 *aptr = make_string(val, strlen(val));
1002 (*aptr)->flags |= MAYBE_NUM;
1004 /* restore '=' so that system() gets a valid environment */
1009 * Put AWKPATH into ENVIRON if it's not there.
1010 * This allows querying it from within awk programs.
1012 tmp = make_string("AWKPATH", 7);
1013 if (! in_array(ENVIRON_node, tmp)) {
1015 * On VMS, environ[] only holds a subset of what getenv() can
1016 * find, so look AWKPATH up before resorting to default path.
1018 val = getenv("AWKPATH");
1021 aptr = assoc_lookup(ENVIRON_node, tmp, FALSE);
1023 *aptr = make_string(val, strlen(val));
1026 return ENVIRON_node;
1029 /* load_procinfo --- populate the PROCINFO array */
1034 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
1040 PROCINFO_node = install_symbol(estrdup("PROCINFO", 8),
1041 mk_symbol(Node_var_array, (NODE *) NULL));
1043 update_PROCINFO_str("version", VERSION);
1044 update_PROCINFO_str("strftime", def_strftime_format);
1047 #define getpgrp_arg() /* nothing */
1049 #define getpgrp_arg() getpid()
1052 value = getpgrp(getpgrp_arg());
1053 update_PROCINFO_num("pgrpid", value);
1056 * Could put a lot of this into a table, but then there's
1057 * portability problems declaring all the functions. So just
1058 * do it the slow and stupid way. Sigh.
1062 update_PROCINFO_num("pid", value);
1065 update_PROCINFO_num("ppid", value);
1068 update_PROCINFO_num("uid", value);
1071 update_PROCINFO_num("euid", value);
1074 update_PROCINFO_num("gid", value);
1077 update_PROCINFO_num("egid", value);
1079 switch (current_field_sep()) {
1080 case Using_FIELDWIDTHS:
1081 update_PROCINFO_str("FS", "FIELDWIDTHS");
1084 update_PROCINFO_str("FS", "FPAT");
1087 update_PROCINFO_str("FS", "FS");
1090 fatal(_("unknown value for field spec: %d\n"),
1091 current_field_sep());
1096 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
1097 for (i = 0; i < ngroups; i++) {
1098 sprintf(name, "group%d", i + 1);
1099 value = groupset[i];
1100 update_PROCINFO_num(name, value);
1107 return PROCINFO_node;
1110 /* is_std_var --- return true if a variable is a standard variable */
1113 is_std_var(const char *var)
1115 const struct varinit *vp;
1117 for (vp = varinit; vp->name != NULL; vp++) {
1118 if (strcmp(vp->name, var) == 0) {
1119 if ((do_traditional || do_posix) && (vp->flags & NON_STANDARD) != 0)
1130 /* get_spec_varname --- return the name of a special variable
1131 with the given assign or update routine.
1135 get_spec_varname(Func_ptr fptr)
1137 const struct varinit *vp;
1141 for (vp = varinit; vp->name != NULL; vp++) {
1142 if (vp->assign == fptr || vp->update == fptr)
1149 /* arg_assign --- process a command-line assignment */
1152 arg_assign(char *arg, int initing)
1161 if (! initing && disallow_var_assigns)
1162 return FALSE; /* --exec */
1164 cp = strchr(arg, '=');
1168 return FALSE; /* This is file name, not assignment. */
1171 _("%s: `%s' argument to `-v' not in `var=value' form\n\n"),
1173 usage(EXIT_FAILURE, stderr);
1178 /* avoid false source indications in a fatal message */
1184 /* first check that the variable name has valid syntax */
1186 if (! isalpha((unsigned char) arg[0]) && arg[0] != '_')
1189 for (cp2 = arg+1; *cp2; cp2++)
1190 if (! isalnum((unsigned char) *cp2) && *cp2 != '_') {
1197 fatal(_("`%s' is not a legal variable name"), arg);
1200 lintwarn(_("`%s' is not a variable name, looking for file `%s=%s'"),
1203 if (check_special(arg) >= 0)
1204 fatal(_("cannot use gawk builtin `%s' as variable name"), arg);
1208 if (var != NULL && var->type == Node_func)
1209 fatal(_("cannot use function `%s' as variable name"), arg);
1213 * BWK awk expands escapes inside assignments.
1214 * This makes sense, so we do it too.
1216 it = make_str_node(cp, strlen(cp), SCAN);
1217 it->flags |= MAYBE_NUM;
1220 * See comment above about locale decimal point.
1223 setlocale(LC_NUMERIC, "C");
1224 (void) force_number(it);
1226 setlocale(LC_NUMERIC, "");
1227 #endif /* LC_NUMERIC */
1230 * since we are restoring the original text of ARGV later,
1231 * need to copy the variable name part if we don't want
1232 * name like v=abc instead of just v in var->vname
1235 cp2 = estrdup(arg, cp - arg); /* var name */
1237 var = variable(cp2, Node_var);
1238 if (var == NULL) /* error */
1240 if (var->type == Node_var && var->var_update)
1242 lhs = get_lhs(var, FALSE);
1245 /* check for set_FOO() routine */
1246 if (var->type == Node_var && var->var_assign)
1251 *--cp = '='; /* restore original text of ARGV */
1256 /* catchsig --- catch signals */
1261 if (sig == SIGFPE) {
1262 fatal(_("floating point exception"));
1263 } else if (sig == SIGSEGV
1268 set_loc(__FILE__, __LINE__);
1269 msg(_("fatal error: internal error"));
1270 /* fatal won't abort() if not compiled for debugging */
1277 #ifdef HAVE_LIBSIGSEGV
1278 /* catchsegv --- for use with libsigsegv */
1281 catchsegv(void *fault_address, int serious)
1283 set_loc(__FILE__, __LINE__);
1284 msg(_("fatal error: internal error: segfault"));
1290 /* catchstackoverflow --- for use with libsigsegv */
1293 catchstackoverflow(int emergency, stackoverflow_context_t scp)
1295 set_loc(__FILE__, __LINE__);
1296 msg(_("fatal error: internal error: stack overflow"));
1301 #endif /* HAVE_LIBSIGSEGV */
1303 /* nostalgia --- print the famous error message and die */
1309 * N.B.: This string is not gettextized, on purpose.
1312 fprintf(stderr, "awk: bailing out near line 1\n");
1317 /* version --- print version message */
1322 printf("%s\n", version_string);
1324 * Per GNU coding standards, print copyright info,
1325 * then exit successfully, do nothing else.
1331 /* init_fds --- check for 0, 1, 2, open on /dev/null if possible */
1339 char const *const opposite_mode[] = {"w", "r", "r"};
1341 /* maybe no stderr, don't bother with error mesg */
1342 for (fd = 0; fd <= 2; fd++) {
1343 if (fstat(fd, &sbuf) < 0) {
1344 #if MAKE_A_HEROIC_EFFORT
1346 lintwarn(_("no pre-opened fd %d"), fd);
1348 newfd = devopen("/dev/null", opposite_mode[fd]);
1349 /* turn off some compiler warnings "set but not used" */
1351 #ifdef MAKE_A_HEROIC_EFFORT
1352 if (do_lint && newfd < 0)
1353 lintwarn(_("could not pre-open /dev/null for fd %d"), fd);
1359 /* init_groupset --- initialize groupset */
1364 #if defined(HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
1365 #ifdef GETGROUPS_NOT_STANDARD
1366 /* For systems that aren't standards conformant, use old way. */
1367 ngroups = NGROUPS_MAX;
1370 * If called with 0 for both args, return value is
1371 * total number of groups.
1373 ngroups = getgroups(0, NULL);
1375 /* If an error or no groups, just give up and get on with life. */
1379 /* fill in groups */
1380 emalloc(groupset, GETGROUPS_T *, ngroups * sizeof(GETGROUPS_T), "init_groupset");
1382 ngroups = getgroups(ngroups, groupset);
1383 /* same thing here, give up but keep going */
1384 if (ngroups == -1) {
1392 /* estrdup --- duplicate a string */
1395 estrdup(const char *str, size_t len)
1398 emalloc(s, char *, len + 1, "estrdup");
1399 memcpy(s, str, len);
1404 #if defined(HAVE_LOCALE_H)
1406 /* init_locale --- initialize locale info. */
1409 * On some operating systems, the pointers in the struct returned
1410 * by localeconv() can become dangling pointers after a call to
1411 * setlocale(). So we do a deep copy.
1413 * Thanks to KIMURA Koichi <kimura.koichi@canon.co.jp>.
1417 init_locale(struct lconv *l)
1423 l->thousands_sep = estrdup(t->thousands_sep, strlen(t->thousands_sep));
1424 l->decimal_point = estrdup(t->decimal_point, strlen(t->decimal_point));
1425 l->grouping = estrdup(t->grouping, strlen(t->grouping));
1426 l->int_curr_symbol = estrdup(t->int_curr_symbol, strlen(t->int_curr_symbol));
1427 l->currency_symbol = estrdup(t->currency_symbol, strlen(t->currency_symbol));
1428 l->mon_decimal_point = estrdup(t->mon_decimal_point, strlen(t->mon_decimal_point));
1429 l->mon_thousands_sep = estrdup(t->mon_thousands_sep, strlen(t->mon_thousands_sep));
1430 l->mon_grouping = estrdup(t->mon_grouping, strlen(t->mon_grouping));
1431 l->positive_sign = estrdup(t->positive_sign, strlen(t->positive_sign));
1432 l->negative_sign = estrdup(t->negative_sign, strlen(t->negative_sign));
1434 #endif /* LOCALE_H */
1436 /* save_argv --- save argv array */
1439 save_argv(int argc, char **argv)
1443 emalloc(d_argv, char **, (argc + 1) * sizeof(char *), "save_argv");
1444 for (i = 0; i < argc; i++)
1445 d_argv[i] = estrdup(argv[i], strlen(argv[i]));
1446 d_argv[argc] = NULL;
1450 * update_global_values --- make sure the symbol table has correct values.
1451 * Called from the grammar before dumping values.
1455 update_global_values()
1457 const struct varinit *vp;
1459 for (vp = varinit; vp->name; vp++) {
1460 if (vp->update != NULL)