2 /********************************************
4 copyright 1991, Michael D. Brennan
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
15 * Revision 1.8 1996/07/28 21:47:05 mike
18 * Revision 1.7 1995/06/18 19:42:24 mike
19 * Remove some redundant declarations and add some prototypes
21 * Revision 1.6 1995/06/10 16:57:52 mike
22 * silently exit(0) if no program
23 * always add a '\n' on eof in scan_fillbuff()
25 * Revision 1.5 1995/06/06 00:18:33 mike
26 * change mawk_exit(1) to mawk_exit(2)
28 * Revision 1.4 1994/09/23 00:20:04 mike
29 * minor bug fix: handle \ in eat_nl()
31 * Revision 1.3 1993/07/17 00:45:21 mike
34 * Revision 1.2 1993/07/04 12:52:09 mike
35 * start on autoconfig changes
37 * Revision 1.1.1.1 1993/07/03 18:58:20 mike
40 * Revision 5.6 1993/02/13 21:57:33 mike
43 * Revision 5.5 1993/01/01 21:30:48 mike
44 * split new_STRING() into new_STRING and new_STRING0
46 * Revision 5.4.1.1 1993/01/15 03:33:50 mike
47 * patch3: safer double to int conversion
49 * Revision 5.4 1992/11/29 18:57:50 mike
50 * field expressions convert to long so 16 bit and 32 bit
51 * systems behave the same
53 * Revision 5.3 1992/07/08 15:43:41 brennan
54 * patch2: length returns. I am a wimp
56 * Revision 5.2 1992/02/21 14:16:53 brennan
59 * Revision 5.1 91/12/05 07:56:27 brennan
81 /* static functions */
82 static void PROTO(scan_fillbuff, (void)) ;
83 static void PROTO(scan_open, (void)) ;
84 static int PROTO(slow_next, (void)) ;
85 static void PROTO(eat_comment, (void)) ;
86 static void PROTO(eat_semi_colon, (void)) ;
87 static double PROTO(collect_decimal, (int, int *)) ;
88 static int PROTO(collect_string, (void)) ;
89 static int PROTO(collect_RE, (void)) ;
92 /*-----------------------------
93 program file management
94 *----------------------------*/
97 STRING *program_string ;
99 static unsigned char *buffer ;
100 static unsigned char *buffp ;
101 /* unsigned so it works with 8 bit chars */
102 static int program_fd ;
103 static int eof_flag ;
106 scan_init(cmdline_program)
107 char *cmdline_program ;
111 program_fd = -1 ; /* command line program */
112 program_string = new_STRING0(strlen(cmdline_program) + 1) ;
113 strcpy(program_string->str, cmdline_program) ;
114 /* simulate file termination */
115 program_string->str[program_string->len - 1] = '\n' ;
116 buffp = (unsigned char *) program_string->str ;
119 else /* program from file[s] */
122 buffp = buffer = (unsigned char *) zmalloc(BUFFSZ + 1) ;
126 #ifdef OS2 /* OS/2 "extproc" is similar to #! */
127 if (strnicmp(buffp, "extproc ", 8) == 0)
130 eat_nl() ; /* scan to first token */
142 scan_open() /* open pfile_name */
144 if (pfile_name[0] == '-' && pfile_name[1] == 0)
148 else if ((program_fd = open(pfile_name, O_RDONLY, 0)) == -1)
150 errmsg(errno, "cannot open %s", pfile_name) ;
158 if (program_fd >= 0) zfree(buffer, BUFFSZ + 1) ;
159 else free_STRING(program_string) ;
161 if (program_fd > 0) close(program_fd) ;
163 /* redefine SPACE as [ \t\n] */
165 scan_code['\n'] = posix_space_flag && rs_shadow.type != SEP_MLR
166 ? SC_UNEXPECTED : SC_SPACE ;
167 scan_code['\f'] = SC_UNEXPECTED ; /*value doesn't matter */
168 scan_code['\013'] = SC_UNEXPECTED ; /* \v not space */
169 scan_code['\r'] = SC_UNEXPECTED ;
172 /*--------------------------------
173 global variables shared by yyparse() and yylex()
174 and used for error messages too
175 *-------------------------------*/
177 int current_token = -1 ;
178 unsigned token_lineno ;
179 unsigned compile_error_count ;
180 int NR_flag ; /* are we tracking NR */
183 int print_flag ; /* changes meaning of '>' */
184 int getline_flag ; /* changes meaning of '<' */
187 /*----------------------------------------
188 file reading functions
189 next() and un_next(c) are macros in scan.h
191 *---------------------*/
193 static unsigned lineno = 1 ;
201 r = fillbuff(program_fd, (char *) buffer, BUFFSZ) ;
205 /* make sure eof is terminated */
211 /* read one character -- slowly */
223 else if (pfile_list /* open another program file */ )
227 if (program_fd > 0) close(program_fd) ;
229 pfile_name = pfile_list->fname ;
231 pfile_list = pfile_list->link ;
234 token_lineno = lineno = 1 ;
236 else break /* real eof */ ;
239 return *buffp++ ; /* note can un_next() , eof which is zero */
247 while ((c = next()) != '\n' && scan_code[c]) ;
251 /* this is how we handle extra semi-colons that are
252 now allowed to separate pattern-action blocks
254 A proof that they are useless clutter to the language:
260 /* eat one semi-colon on the current line */
264 while (scan_code[c = next()] == SC_SPACE) ;
265 if (c != ';') un_next() ;
269 eat_nl() /* eat all space including newlines */
272 switch (scan_code[next()])
286 /* bug fix - surprised anyone did this,
287 a csh user with backslash dyslexia.(Not a joke)
292 while (scan_code[c = next()] == SC_SPACE) ;
294 token_lineno = ++lineno ;
303 /* can't un_next() twice so deal with it */
306 if( ++compile_error_count == MAX_COMPILE_ERRORS )
324 token_lineno = lineno ;
328 switch (scan_code[c = next()])
346 while (scan_code[c = next()] == SC_SPACE) ;
349 token_lineno = ++lineno ;
353 if (c == 0) ct_ret(EOF) ;
374 string_buff[1] = '+' ;
392 string_buff[1] = '-' ;
409 test1_ret('=', MUL_ASG, MUL) ;
413 static int can_precede_div[] =
414 {DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD,
415 GETLINE, INC_or_DEC, -1} ;
417 int *p = can_precede_div ;
421 if (*p == current_token)
423 if (*p != INC_or_DEC) { test1_ret('=', DIV_ASG, DIV) ; }
428 ct_ret(collect_RE()) ;
434 ct_ret(collect_RE()) ;
438 test1_ret('=', MOD_ASG, MOD) ;
441 test1_ret('=', POW_ASG, POW) ;
450 compile_error("extra ')'") ;
464 string_buff[0] = '~' ;
470 test1_ret('=', EQ, ASSIGN) ;
473 if ((c = next()) == '~')
475 string_buff[0] = '!' ;
476 string_buff[1] = '~' ;
481 else if (c == '=') ct_ret(NEQ) ;
487 case SC_LT: /* '<' */
488 if (next() == '=') ct_ret(LTE) ;
498 case SC_GT: /* '>' */
499 if (print_flag && paren_cnt == 0)
502 /* there are 3 types of IO_OUT
503 -- build the error string in string_buff */
504 string_buff[0] = '>' ;
507 yylval.ival = F_APPEND ;
508 string_buff[1] = '>' ;
514 yylval.ival = F_TRUNC ;
517 return current_token = IO_OUT ;
520 test1_ret('=', GTE, GT) ;
532 if (print_flag && paren_cnt == 0)
535 yylval.ival = PIPE_OUT ;
536 string_buff[0] = '|' ;
565 compile_error("extra '}'") ;
571 if ((c = current_token) == NL || c == SEMI_COLON
572 || c == SC_FAKE_SEMI_COLON || c == RBRACE)
574 /* if the brace_cnt is zero , we've completed
575 a pattern action block. If the user insists
576 on adding a semi-colon on the same line
577 we will eat it. Note what we do below:
578 physical law -- conservation of semi-colons */
580 if (brace_cnt == 0) eat_semi_colon() ;
585 /* supply missing semi-colon to statement that
589 current_token = SC_FAKE_SEMI_COLON ;
597 static double double_zero = 0.0 ;
598 static double double_one = 1.0 ;
600 if ((d = collect_decimal(c, &flag)) == 0.0)
602 if (flag) ct_ret(flag) ;
603 else yylval.ptr = (PTR) & double_zero ;
607 yylval.ptr = (PTR) & double_one ;
611 yylval.ptr = (PTR) ZMALLOC(double) ;
612 *(double *) yylval.ptr = d ;
617 case SC_DOLLAR: /* '$' */
622 while (scan_code[c = next()] == SC_SPACE) ;
623 if (scan_code[c] != SC_DIGIT &&
624 scan_code[c] != SC_DOT)
630 /* compute field address at compile time */
631 if ((d = collect_decimal(c, &flag)) == 0.0)
633 if (flag) ct_ret(flag) ; /* an error */
634 else yylval.cp = &field[0] ;
641 "$%g exceeds maximum field(%d)", d, MAX_FIELD) ;
644 yylval.cp = field_ptr((int) d) ;
651 return current_token = collect_string() ;
653 case SC_IDCHAR: /* collect an identifier */
656 (unsigned char *) string_buff + 1 ;
662 (c = scan_code[*p++ = next()]) == SC_IDCHAR ||
668 switch ((stp = find(string_buff))->type)
671 /* check for function call before defined */
674 stp->type = ST_FUNCT ;
675 stp->stval.fbp = (FBLOCK *)
676 zmalloc(sizeof(FBLOCK)) ;
677 stp->stval.fbp->name = stp->name ;
678 stp->stval.fbp->code = (INST *) 0 ;
679 yylval.fbp = stp->stval.fbp ;
680 current_token = FUNCT_ID ;
686 current_token == DOLLAR ? D_ID : ID ;
704 current_token == DOLLAR ? D_ID : ID ;
708 stp->type = ST_ARRAY ;
709 stp->stval.array = new_ARRAY() ;
710 load_environ(stp->stval.array) ;
713 current_token == DOLLAR ? D_ID : ID ;
717 yylval.fbp = stp->stval.fbp ;
718 current_token = FUNCT_ID ;
722 current_token = stp->stval.kw ;
726 yylval.bip = stp->stval.bip ;
727 current_token = BUILTIN ;
732 yylval.bip = stp->stval.bip ;
734 /* check for length alone, this is an ugly
736 while (scan_code[c = next()] == SC_SPACE) ;
739 current_token = c == '(' ? BUILTIN : LENGTH ;
743 yylval.cp = stp->stval.cp ;
744 current_token = FIELD ;
748 bozo("find returned bad st type") ;
750 return current_token ;
755 yylval.ival = c & 0xff ;
758 return 0 ; /* never get here make lint happy */
761 /* collect a decimal constant in temp_buff.
762 Return the value and error conditions by reference */
765 collect_decimal(c, flag)
769 register unsigned char *p = (unsigned char *) string_buff + 1 ;
770 unsigned char *endp ;
778 if (scan_code[*p++ = next()] != SC_DIGIT)
787 while (scan_code[*p++ = next()] == SC_DIGIT) ;
794 /* get rest of digits after decimal point */
795 while (scan_code[*p++ = next()] == SC_DIGIT) ;
797 /* check for exponent */
798 if (p[-1] != 'e' && p[-1] != 'E')
803 else /* get the exponent */
805 if (scan_code[*p = next()] != SC_DIGIT &&
806 *p != '-' && *p != '+')
809 *flag = BAD_DECIMAL ;
812 else /* get the rest of the exponent */
815 while (scan_code[*p++ = next()] == SC_DIGIT) ;
821 errno = 0 ; /* check for overflow/underflow */
822 d = strtod(string_buff, (char **) &endp) ;
824 #ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG
825 if (errno) compile_error("%s : decimal %sflow", string_buff,
826 d == 0.0 ? "under" : "over") ;
827 #else /* ! sun4 bug */
828 if (errno && d != 0.0)
829 compile_error("%s : decimal overflow", string_buff) ;
834 *flag = BAD_DECIMAL ;
840 /*---------- process escape characters ---------------*/
842 static char hex_val['f' - 'A' + 1] =
844 10, 11, 12, 13, 14, 15, 0, 0,
845 0, 0, 0, 0, 0, 0, 0, 0,
846 0, 0, 0, 0, 0, 0, 0, 0,
847 0, 0, 0, 0, 0, 0, 0, 0,
848 10, 11, 12, 13, 14, 15} ;
850 #define isoctal(x) ((x)>='0'&&(x)<='7')
852 #define hex_value(x) hex_val[(x)-'A']
854 #define ishex(x) (scan_code[x] == SC_DIGIT ||\
855 ('A' <= (x) && (x) <= 'f' && hex_value(x)))
857 static int PROTO(octal, (char **)) ;
858 static int PROTO(hex, (char **)) ;
860 /* process one , two or three octal digits
861 moving a pointer forward by reference */
866 register char *p = *start_p ;
867 register unsigned x ;
872 x = (x << 3) + *p++ - '0' ;
873 if (isoctal(*p)) x = (x << 3) + *p++ - '0' ;
879 /* process one or two hex digits
880 moving a pointer forward by reference */
886 register unsigned char *p = (unsigned char *) *start_p ;
887 register unsigned x ;
890 if (scan_code[*p] == SC_DIGIT) x = *p++ - '0' ;
891 else x = hex_value(*p++) ;
893 if (scan_code[*p] == SC_DIGIT) x = (x << 4) + *p++ - '0' ;
894 else if ('A' <= *p && *p <= 'f' && (t = hex_value(*p)))
900 *start_p = (char *) p ;
910 escape_test[ET_END + 1] =
925 /* process the escape characters in a string, in place . */
931 register char *p, *q ;
941 escape_test[ET_END].in = *++p ; /* sentinal */
943 while (escape_test[i].in != *p) i++ ;
945 if (i != ET_END) /* in table */
948 *q++ = escape_test[i].out ;
950 else if (isoctal(*p))
956 else if (*p == 'x' && ishex(*(unsigned char *) (p + 1)))
962 else if (*p == 0) /* can only happen with command line assign */
964 else /* not an escape sequence */
980 register unsigned char *p = (unsigned char *) string_buff ;
982 int e_flag = 0 ; /* on if have an escape char */
985 switch (scan_code[*p++ = next()])
987 case SC_DQUOTE: /* done */
995 case 0: /* unterminated string */
997 "runaway string constant \"%.10s ...",
998 string_buff, token_lineno) ;
1002 if ((c = next()) == '\n')
1007 else if (c == 0) un_next() ;
1021 yylval.ptr = (PTR) new_STRING(
1022 e_flag ? rm_escape(string_buff)
1031 register unsigned char *p = (unsigned char *) string_buff ;
1036 switch (scan_code[*p++ = next()])
1038 case SC_DIV: /* done */
1046 case 0: /* unterminated re */
1048 "runaway regular expression /%.10s ...",
1049 string_buff, token_lineno) ;
1075 /* now we've got the RE, so compile it */
1076 sval = new_STRING(string_buff) ;
1077 yylval.ptr = re_compile(sval) ;