1 /* Small compiler - File input, preprocessing and lexical analysis functions
3 * Copyright (c) ITB CompuPhase, 1997-2003
5 * This software is provided "as-is", without any express or implied warranty.
6 * In no event will the authors be held liable for any damages arising from
7 * the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software in
15 * a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 * 2. Altered source versions must be plainly marked as such, and must not be
18 * misrepresented as being the original software.
19 * 3. This notice may not be removed or altered from any source distribution.
35 #include "embryo_cc_sc.h"
38 static int match(char *st, int end);
39 static cell litchar(char **lptr, int rawmode);
40 static int alpha(char c);
42 static int icomment; /* currently in multiline comment? */
43 static int iflevel; /* nesting level if #if/#else/#endif */
44 static int skiplevel; /* level at which we started skipping */
45 static int elsedone; /* level at which we have seen an #else */
46 static char term_expr[] = "";
47 static int listline = -1; /* "current line" for the list file */
51 * Uses a LIFO stack to store information. The stack is used by doinclude(),
52 * doswitch() (to hold the state of "swactive") and some other routines.
54 * Porting note: I made the bold assumption that an integer will not be
55 * larger than a pointer (it may be smaller). That is, the stack element
56 * is typedef'ed as a pointer type, but I also store integers on it. See
59 * Global references: stack,stkidx (private to pushstk() and popstk())
61 static stkitem stack[sSTKMAX];
66 if (stkidx >= sSTKMAX)
67 error(102, "parser stack"); /* stack overflow (recursive include?) */
76 return (stkitem) - 1; /* stack is empty */
82 plungequalifiedfile(char *name)
84 static char *extensions[] = { ".inc", ".sma", ".small" };
92 fp = (FILE *) sc_opensrc(name);
93 ext = strchr(name, '\0'); /* save position */
96 /* try to append an extension */
97 strcpy(ext, extensions[ext_idx]);
98 fp = (FILE *) sc_opensrc(name);
100 *ext = '\0'; /* on failure, restore filename */
105 (ext_idx < (int)(sizeof extensions / sizeof extensions[0])));
108 *ext = '\0'; /* restore filename */
111 pushstk((stkitem) inpf);
112 pushstk((stkitem) inpfname); /* pointer to current file name */
113 pushstk((stkitem) curlibrary);
114 pushstk((stkitem) iflevel);
115 assert(skiplevel == 0);
116 pushstk((stkitem) icomment);
117 pushstk((stkitem) fcurrent);
118 pushstk((stkitem) fline);
119 inpfname = strdup(name); /* set name of include file */
121 error(103); /* insufficient memory */
122 inpf = fp; /* set input file pointer to include file */
124 fline = 0; /* set current line number to 0 */
127 setfile(inpfname, fcurrent);
128 listline = -1; /* force a #line directive when changing the file */
129 setactivefile(fcurrent);
134 plungefile(char *name, int try_currentpath, int try_includepaths)
141 result = plungequalifiedfile(name);
143 if (try_includepaths && name[0] != DIRSEP_CHAR)
145 for (i = 0; !result && (ptr = get_path(i)); i++)
149 strncpy(path, ptr, sizeof path);
150 path[sizeof path - 1] = '\0'; /* force '\0' termination */
151 strncat(path, name, sizeof(path) - strlen(path));
152 path[sizeof path - 1] = '\0';
153 result = plungequalifiedfile(path);
160 check_empty(char *lptr)
162 /* verifies that the string contains only whitespace */
163 while (*lptr <= ' ' && *lptr != '\0')
166 error(38); /* extra characters on line */
171 * Gets the name of an include file, pushes the old file on the stack and
172 * sets some options. This routine doesn't use lex(), since lex() doesn't
173 * recognize file names (and directories).
175 * Global references: inpf (altered)
183 char name[PATH_MAX], c;
186 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
188 if (*lptr == '<' || *lptr == '\"')
190 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
192 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
201 while ((*lptr != c) && (*lptr != '\0') && (i < (int)(sizeof(name) - 1))) /* find the end of the string */
203 while (i > 0 && name[i - 1] <= ' ')
204 i--; /* strip trailing whitespace */
205 assert((i >= 0) && (i < (int)(sizeof(name))));
206 name[i] = '\0'; /* zero-terminate the string */
209 { /* verify correct string termination */
210 error(37); /* invalid string */
214 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
216 /* Include files between "..." or without quotes are read from the current
217 * directory, or from a list of "include directories". Include files
218 * between <...> are only read from the list of include directories.
220 result = plungefile(name, (c != '>'), TRUE);
222 error(100, name); /* cannot read from ... (fatal error) */
227 * Reads in a new line from the input file pointed to by "inpf". readline()
228 * concatenates lines that end with a \ with the next line. If no more data
229 * can be read from the file, readline() attempts to pop off the previous file
230 * from the stack. If that fails too, it sets "freading" to 0.
232 * Global references: inpf,fline,inpfname,freading,icomment (altered)
240 if (lptr == term_expr)
246 if (!inpf || sc_eofsrc(inpf))
249 error(49); /* invalid line continuation */
250 if (inpf && inpf != inpf_org)
252 i = (int)(long)popstk();
254 { /* All's done; popstk() returns "stack is empty" */
257 /* when there is nothing more to read, the #if/#else stack should
258 * be empty and we should not be in a comment
260 assert(iflevel >= 0);
262 error(1, "#endif", "-end of file-");
264 error(1, "*/", "-end of file-");
268 fcurrent = (int)(long)popstk();
269 icomment = (int)(long)popstk();
270 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
271 iflevel = (int)(long)popstk();
272 curlibrary = (constvalue *) popstk();
273 free(inpfname); /* return memory allocated for the include file name */
274 inpfname = (char *)popstk();
275 inpf = (FILE *) popstk();
276 setactivefile(fcurrent);
277 listline = -1; /* force a #line directive when changing the file */
281 if (!sc_readsrc(inpf, line, num))
283 *line = '\0'; /* delete line */
288 /* check whether to erase leading spaces */
293 while (*ptr == ' ' || *ptr == '\t')
296 memmove(line, ptr, strlen(ptr) + 1);
299 /* check whether a full line was read */
300 if (!strchr(line, '\n') && !sc_eofsrc(inpf))
301 error(75); /* line too long */
302 /* check if the next line must be concatenated to this line */
303 if ((ptr = strchr(line, '\n')) && ptr > line)
305 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
307 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
308 ptr--; /* skip trailing whitespace */
312 /* set '\a' at the position of '\\' to make it possible to check
313 * for a line continuation in a single line comment (error 49)
316 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
320 line += strlen(line);
324 while (num >= 0 && cont);
329 * Replaces all comments from the line by space characters. It updates
330 * a global variable ("icomment") for multiline comments.
332 * This routine also supports the C++ extension for single line comments.
333 * These comments are started with "//" and end at the end of the line.
335 * Global references: icomment (private to "stripcom")
346 if (*line == '*' && *(line + 1) == '/')
348 icomment = FALSE; /* comment has ended */
349 *line = ' '; /* replace '*' and '/' characters by spaces */
355 if (*line == '/' && *(line + 1) == '*')
356 error(216); /* nested comment */
357 *line = ' '; /* replace comments by spaces */
363 if (*line == '/' && *(line + 1) == '*')
365 icomment = TRUE; /* start comment */
366 *line = ' '; /* replace '/' and '*' characters by spaces */
370 else if (*line == '/' && *(line + 1) == '/')
371 { /* comment to end of line */
372 if (strchr(line, '\a'))
373 error(49); /* invalid line continuation */
374 *line++ = '\n'; /* put "newline" at first slash */
375 *line = '\0'; /* put "zero-terminator" at second slash */
379 if (*line == '\"' || *line == '\'')
380 { /* leave literals unaltered */
381 c = *line; /* ending quote, single or double */
383 while ((*line != c || *(line - 1) == '\\')
386 line += 1; /* skip final quote */
399 * Attempts to interpret a numeric symbol as a boolean value. On success
400 * it returns the number of characters processed (so the line pointer can be
401 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
404 * A boolean value must start with "0b"
407 btoi(cell * val, char *curptr)
413 if (*ptr == '0' && *(ptr + 1) == 'b')
416 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
419 *val = (*val << 1) | (*ptr - '0');
427 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
430 return (int)(ptr - curptr);
435 * Attempts to interpret a numeric symbol as a decimal value. On success
436 * it returns the number of characters processed and the value is stored in
437 * "val". Otherwise it returns 0 and "val" is garbage.
440 dtoi(cell * val, char *curptr)
446 if (!isdigit(*ptr)) /* should start with digit */
448 while (isdigit(*ptr) || *ptr == '_')
451 *val = (*val * 10) + (*ptr - '0');
454 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
456 if (*ptr == '.' && isdigit(*(ptr + 1)))
457 return 0; /* but a fractional part must not be present */
458 return (int)(ptr - curptr);
463 * Attempts to interpret a numeric symbol as a hexadecimal value. On
464 * success it returns the number of characters processed and the value is
465 * stored in "val". Otherwise it return 0 and "val" is garbage.
468 htoi(cell * val, char *curptr)
474 if (!isdigit(*ptr)) /* should start with digit */
476 if (*ptr == '0' && *(ptr + 1) == 'x')
477 { /* C style hexadecimal notation */
479 while (isxdigit(*ptr) || *ptr == '_')
483 assert(isxdigit(*ptr));
486 *val += (*ptr - '0');
488 *val += (tolower(*ptr) - 'a' + 10);
500 return (int)(ptr - curptr);
530 * Attempts to interpret a numeric symbol as a rational number, either as
531 * IEEE 754 single precision floating point or as a fixed point integer.
532 * On success it returns the number of characters processed and the value is
533 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
535 * Small has stricter definition for floating point numbers than most:
536 * o the value must start with a digit; ".5" is not a valid number, you
538 * o a period must appear in the value, even if an exponent is given; "2e3"
539 * is not a valid number, you should write "2.0e3"
540 * o at least one digit must follow the period; "6." is not a valid number,
541 * you should write "6.0"
544 ftoi(cell * val, char *curptr)
547 double fnum, ffrac, fmult;
548 unsigned long dnum, dbase;
551 assert(rational_digits >= 0 && rational_digits < 9);
552 for (i = 0, dbase = 1; i < rational_digits; i++)
557 if (!isdigit(*ptr)) /* should start with digit */
559 while (isdigit(*ptr) || *ptr == '_')
563 fnum = (fnum * 10.0) + (*ptr - '0');
564 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
569 return 0; /* there must be a period */
571 if (!isdigit(*ptr)) /* there must be at least one digit after the dot */
576 while (isdigit(*ptr) || *ptr == '_')
580 ffrac = (ffrac * 10.0) + (*ptr - '0');
581 fmult = fmult / 10.0;
583 dnum += (*ptr - '0') * dbase;
584 if (dbase == 0L && sc_rationaltag && rational_digits > 0
587 error(222); /* number of digits exceeds rational number precision */
593 fnum += ffrac * fmult; /* form the number so far */
595 { /* optional fractional part */
608 if (!isdigit(*ptr)) /* 'e' should be followed by a digit */
611 while (isdigit(*ptr))
613 exp = (exp * 10) + (*ptr - '0');
617 fmult = pow10(exp * sign);
619 fmult = pow(10, exp * sign);
622 dnum *= (unsigned long)(fmult + 0.5);
625 /* decide how to store the number */
626 if (sc_rationaltag == 0)
628 error(70); /* rational number support was not enabled */
631 else if (rational_digits == 0)
633 float f = (float) fnum;
635 *val = EMBRYO_FLOAT_TO_CELL(f);
637 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
638 * format (as mandated in the ANSI standard). Test this assumption anyway.
641 float test1 = 0.0, test2 = 50.0;
642 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
643 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
645 if (c1 != 0x00000000L)
648 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
649 "point math as embryo expects. this could be bad.\n"
651 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
653 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
654 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
657 else if (c2 != 0x42480000L)
660 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
661 "point math as embryo expects. This could be bad.\n"
663 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
665 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
666 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
678 return (int)(ptr - curptr);
683 * Reads in a number (binary, decimal or hexadecimal). It returns the number
684 * of characters processed or 0 if the symbol couldn't be interpreted as a
685 * number (in this case the argument "val" remains unchanged). This routine
686 * relies on the 'early dropout' implementation of the logical or (||)
689 * Note: the routine doesn't check for a sign (+ or -). The - is checked
690 * for at "hier2()" (in fact, it is viewed as an operator, not as a
691 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
694 number(cell * val, char *curptr)
699 if ((i = btoi(&value, curptr)) != 0 /* binary? */
700 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
701 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
708 return 0; /* else not a number */
713 chrcat(char *str, char chr)
715 str = strchr(str, '\0');
721 preproc_expr(cell * val, int *tag)
728 /* Disable staging; it should be disabled already because
729 * expressions may not be cut off half-way between conditional
730 * compilations. Reset the staging index, but keep the code
733 if (stgget(&index, &code_index))
735 error(57); /* unfinished expression */
736 stgdel(0, code_index);
739 /* append a special symbol to the string, so the expression
740 * analyzer won't try to read a next line when it encounters
743 assert(strlen(pline) < sLINEMAX);
744 term = strchr(pline, '\0');
745 assert(term != NULL);
746 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
747 result = constexpr(val, tag); /* get value (or 0 on error) */
748 *term = '\0'; /* erase the token (if still present) */
749 lexclr(FALSE); /* clear any "pushed" tokens */
754 * Returns returns a pointer behind the closing quote or to the other
755 * character that caused the input to be ended.
758 getstring(char *dest, int max, char *line)
760 assert(dest != NULL && line != NULL);
762 while (*line <= ' ' && *line != '\0')
763 line++; /* skip whitespace */
766 error(37); /* invalid string */
768 else if (*line == '\0')
773 while (*line != '"' && *line != '\0')
781 lptr++; /* skip closing " */
783 error(37); /* invalid string */
802 * Recognizes the compiler directives. The function returns:
803 * CMD_NONE the line must be processed
804 * CMD_TERM a pending expression must be completed before processing further lines
805 * Other value: the line must be skipped, because:
806 * CMD_CONDFALSE false "#if.." code
807 * CMD_EMPTYLINE line is empty
808 * CMD_INCLUDE the line contains a #include directive
809 * CMD_DEFINE the line contains a #subst directive
810 * CMD_IF the line contains a #if/#else/#endif directive
811 * CMD_DIRECTIVE the line contains some other compiler directive
813 * Global variables: iflevel, skiplevel, elsedone (altered)
825 while (*lptr <= ' ' && *lptr != '\0')
828 return CMD_EMPTYLINE; /* empty line */
830 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
831 /* compiler directive found */
832 indent_nowarn = TRUE; /* allow loose indentation" */
833 lexclr(FALSE); /* clear any "pushed" tokens */
834 /* on a pending expression, force to return a silent ';' token and force to
837 if (!sc_needsemicolon && stgget(&index, &code_index))
842 tok = lex(&val, &str);
843 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
846 case tpIF: /* conditional compilation */
850 break; /* break out of switch */
851 preproc_expr(&val, NULL); /* get value (or 0 on error) */
858 if (iflevel == 0 && skiplevel == 0)
860 error(26); /* no matching #if */
865 if (elsedone == iflevel)
866 error(60); /* multiple #else directives between #if ... #endif */
868 if (skiplevel == iflevel)
870 else if (skiplevel == 0)
875 #if 0 /* ??? *really* need to use a stack here */
878 if (iflevel == 0 && skiplevel == 0)
880 error(26); /* no matching #if */
883 else if (elsedone == iflevel)
885 error(61); /* #elseif directive may not follow an #else */
890 preproc_expr(&val, NULL); /* get value (or 0 on error) */
892 skiplevel = iflevel; /* we weren't skipping, start skipping now */
894 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
895 /* else: we were skipping and condition is invalid -> keep skipping */
902 if (iflevel == 0 && skiplevel == 0)
909 if (skiplevel == iflevel)
911 if (elsedone == iflevel)
912 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
913 * the state whether an #else was seen per nesting level */
918 case tINCLUDE: /* #include directive */
926 char pathname[PATH_MAX];
928 lptr = getstring(pathname, sizeof pathname, lptr);
929 if (pathname[0] != '\0')
932 inpfname = strdup(pathname);
934 error(103); /* insufficient memory */
942 if (lex(&val, &str) != tNUMBER)
943 error(8); /* invalid/non-constant expression */
949 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
951 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
953 error(7); /* assertion failed */
960 if (lex(&val, &str) == tSYMBOL)
962 if (strcmp(str, "ctrlchar") == 0)
964 if (lex(&val, &str) != tNUMBER)
965 error(27); /* invalid character constant */
966 sc_ctrlchar = (char)val;
968 else if (strcmp(str, "compress") == 0)
972 preproc_expr(&val, NULL);
973 sc_compress = (int)val; /* switch code packing on/off */
975 else if (strcmp(str, "dynamic") == 0)
977 preproc_expr(&sc_stksize, NULL);
979 else if (strcmp(str, "library") == 0)
981 char name[sNAMEMAX + 1];
983 while (*lptr <= ' ' && *lptr != '\0')
987 lptr = getstring(name, sizeof name, lptr);
994 (i < (int)(sizeof(name))) &&
1000 if (name[0] == '\0')
1006 if (strlen(name) > sEXPMAX)
1007 error(220, name, sEXPMAX); /* exported symbol is truncated */
1008 /* add the name if it does not yet exist in the table */
1009 if (!find_constval(&libname_tab, name, 0))
1011 append_constval(&libname_tab, name, 0, 0);
1014 else if (strcmp(str, "pack") == 0)
1018 preproc_expr(&val, NULL); /* default = packed/unpacked */
1019 sc_packstr = (int)val;
1021 else if (strcmp(str, "rational") == 0)
1023 char name[sNAMEMAX + 1];
1027 /* first gather all information, start with the tag name */
1028 while ((*lptr <= ' ') && (*lptr != '\0'))
1031 (i < (int)(sizeof(name))) &&
1036 /* then the precision (for fixed point arithmetic) */
1037 while (*lptr <= ' ' && *lptr != '\0')
1041 preproc_expr(&digits, NULL);
1042 if (digits <= 0 || digits > 9)
1044 error(68); /* invalid rational number precision */
1050 /* add the tag (make it public) and check the values */
1051 i = sc_addtag(name);
1053 if (sc_rationaltag == 0
1054 || (sc_rationaltag == i
1055 && rational_digits == (int)digits))
1058 rational_digits = (int)digits;
1062 error(69); /* rational number format already set, can only be set once */
1065 else if (strcmp(str, "semicolon") == 0)
1069 preproc_expr(&val, NULL);
1070 sc_needsemicolon = (int)val;
1072 else if (strcmp(str, "tabsize") == 0)
1076 preproc_expr(&val, NULL);
1077 sc_tabsize = (int)val;
1079 else if (strcmp(str, "align") == 0)
1081 sc_alignnext = TRUE;
1083 else if (strcmp(str, "unused") == 0)
1085 char name[sNAMEMAX + 1];
1092 while ((*lptr <= ' ') && (*lptr != '\0'))
1095 (i < (int)(sizeof(name))) &&
1100 /* get the symbol */
1101 sym = findloc(name);
1103 sym = findglb(name);
1106 sym->usage |= uREAD;
1107 if (sym->ident == iVARIABLE
1108 || sym->ident == iREFERENCE
1109 || sym->ident == iARRAY
1110 || sym->ident == iREFARRAY)
1111 sym->usage |= uWRITTEN;
1115 error(17, name); /* undefined symbol */
1117 /* see if a comma follows the name */
1118 while (*lptr <= ' ' && *lptr != '\0')
1120 comma = (*lptr == ',');
1128 error(207); /* unknown #pragma */
1133 error(207); /* unknown #pragma */
1143 assert(inpf != NULL);
1144 if (inpf != inpf_org)
1152 /* write opcode to output file */
1156 while (*lptr <= ' ' && *lptr != '\0')
1158 for (i = 0; i < 40 && (isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1159 name[i] = (char)tolower(*lptr);
1164 code_idx += opcodes(1);
1165 /* write parameter (if any) */
1166 while (*lptr <= ' ' && *lptr != '\0')
1172 tok = lex(&val, &str);
1178 code_idx += opargs(1);
1184 if (!sym || (sym->ident != iFUNCTN
1185 && sym->ident != iREFFUNC
1186 && (sym->usage & uDEFINE) == 0))
1188 error(17, str); /* undefined symbol */
1192 outval(sym->addr, FALSE);
1193 /* mark symbol as "used", unknown whether for read or write */
1194 markusage(sym, uREAD | uWRITTEN);
1195 code_idx += opargs(1);
1201 extern char *sc_tokens[]; /* forward declaration */
1204 sprintf(s2, "%c", (char)tok);
1206 strcpy(s2, sc_tokens[tok - tFIRST]);
1207 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1217 #if !defined NO_DEFINE
1223 char *pattern, *substitution;
1225 int count, prefixlen;
1228 /* find the pattern to match */
1229 while (*lptr <= ' ' && *lptr != '\0')
1231 start = lptr; /* save starting point of the match pattern */
1233 while (*lptr > ' ' && *lptr != '\0')
1235 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1239 /* check pattern to match */
1240 if (!isalpha(*start) && *start != '_')
1242 error(74); /* pattern must start with an alphabetic character */
1245 /* store matched pattern */
1246 pattern = malloc(count + 1);
1248 error(103); /* insufficient memory */
1254 assert(*lptr != '\0');
1255 pattern[count++] = (char)litchar(&lptr, FALSE);
1257 pattern[count] = '\0';
1258 /* special case, erase trailing variable, because it could match anything */
1259 if (count >= 2 && isdigit(pattern[count - 1])
1260 && pattern[count - 2] == '%')
1261 pattern[count - 2] = '\0';
1262 /* find substitution string */
1263 while (*lptr <= ' ' && *lptr != '\0')
1265 start = lptr; /* save starting point of the match pattern */
1268 while (*lptr != '\0')
1270 /* keep position of the start of trailing whitespace */
1285 /* store matched substitution */
1286 substitution = malloc(count + 1); /* +1 for '\0' */
1288 error(103); /* insufficient memory */
1294 assert(*lptr != '\0');
1295 substitution[count++] = *lptr++;
1297 substitution[count] = '\0';
1298 /* check whether the definition already exists */
1299 for (prefixlen = 0, start = pattern;
1300 isalpha(*start) || isdigit(*start) || *start == '_';
1301 prefixlen++, start++)
1303 assert(prefixlen > 0);
1304 if ((def = find_subst(pattern, prefixlen)))
1306 if (strcmp(def->first, pattern) != 0
1307 || strcmp(def->second, substitution) != 0)
1308 error(201, pattern); /* redefinition of macro (non-identical) */
1309 delete_subst(pattern, prefixlen);
1311 /* add the pattern/substitution pair to the list */
1312 assert(pattern[0] != '\0');
1313 insert_subst(pattern, substitution, prefixlen);
1322 if (lex(&val, &str) == tSYMBOL)
1324 if (!delete_subst(str, strlen(str)))
1325 error(17, str); /* undefined symbol */
1329 error(20, str); /* invalid symbol name */
1336 error(31); /* unknown compiler directive */
1337 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1342 #if !defined NO_DEFINE
1344 is_startstring(char *string)
1346 if (*string == '\"' || *string == '\'')
1347 return TRUE; /* "..." */
1352 if (*string == '\"' || *string == '\'')
1353 return TRUE; /* !"..." */
1354 if (*string == sc_ctrlchar)
1357 if (*string == '\"' || *string == '\'')
1358 return TRUE; /* !\"..." */
1361 else if (*string == sc_ctrlchar)
1364 if (*string == '\"' || *string == '\'')
1365 return TRUE; /* \"..." */
1369 if (*string == '\"' || *string == '\'')
1370 return TRUE; /* \!"..." */
1378 skipstring(char *string)
1381 int rawstring = FALSE;
1383 while (*string == '!' || *string == sc_ctrlchar)
1385 rawstring = (*string == sc_ctrlchar);
1390 assert(endquote == '\"' || endquote == '\'');
1391 string++; /* skip open quote */
1392 while (*string != endquote && *string != '\0')
1393 litchar(&string, rawstring);
1398 skippgroup(char *string)
1401 char open = *string;
1420 close = '\0'; /* only to avoid a compiler warning */
1424 while (*string != close || nest > 0)
1426 if (*string == open)
1428 else if (*string == close)
1430 else if (is_startstring(string))
1431 string = skipstring(string);
1432 if (*string == '\0')
1440 strdel(char *str, size_t len)
1442 size_t length = strlen(str);
1446 memmove(str, str + len, length - len + 1); /* include EOS byte */
1451 strins(char *dest, char *src, size_t srclen)
1453 size_t destlen = strlen(dest);
1455 assert(srclen <= strlen(src));
1456 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1457 memcpy(dest, src, srclen);
1462 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1465 char *p, *s, *e, *args[10];
1466 int match, arg, len;
1468 memset(args, 0, sizeof args);
1470 /* check the length of the prefix */
1471 for (prefixlen = 0, s = pattern; isalpha(*s) || isdigit(*s) || *s == '_';
1474 assert(prefixlen > 0);
1475 assert(strncmp(line, pattern, prefixlen) == 0);
1477 /* pattern prefix matches; match the rest of the pattern, gather
1480 s = line + prefixlen;
1481 p = pattern + prefixlen;
1482 match = TRUE; /* so far, pattern matches */
1483 while (match && *s != '\0' && *p != '\0')
1491 assert(arg >= 0 && arg <= 9);
1492 p++; /* skip parameter id */
1494 /* match the source string up to the character after the digit
1495 * (skipping strings in the process
1498 while (*e != *p && *e != '\0' && *e != '\n')
1500 if (is_startstring(e)) /* skip strings */
1502 else if (strchr("({[", *e)) /* skip parenthized groups */
1505 e++; /* skip non-alphapetic character (or closing quote of
1506 * a string, or the closing paranthese of a group) */
1508 /* store the parameter (overrule any earlier) */
1512 args[arg] = malloc(len + 1);
1514 error(103); /* insufficient memory */
1515 strncpy(args[arg], s, len);
1516 args[arg][len] = '\0';
1517 /* character behind the pattern was matched too */
1522 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1523 && !sc_needsemicolon)
1525 s = e; /* allow a trailing ; in the pattern match to end of line */
1529 assert(*e == '\0' || *e == '\n');
1540 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1542 /* source may be ';' or end of the line */
1543 while (*s <= ' ' && *s != '\0')
1544 s++; /* skip white space */
1545 if (*s != ';' && *s != '\0')
1547 p++; /* skip the semicolon in the pattern */
1553 /* skip whitespace between two non-alphanumeric characters, except
1554 * for two identical symbols
1556 assert(p > pattern);
1557 if (!alphanum(*p) && *(p - 1) != *p)
1558 while (*s <= ' ' && *s != '\0')
1559 s++; /* skip white space */
1560 ch = litchar(&p, FALSE); /* this increments "p" */
1564 s++; /* this character matches */
1568 if (match && *p == '\0')
1570 /* if the last character to match is an alphanumeric character, the
1571 * current character in the source may not be alphanumeric
1573 assert(p > pattern);
1574 if (alphanum(*(p - 1)) && alphanum(*s))
1580 /* calculate the length of the substituted string */
1581 for (e = substitution, len = 0; *e != '\0'; e++)
1583 if (*e == '%' && isdigit(*(e + 1)))
1585 arg = *(e + 1) - '0';
1586 assert(arg >= 0 && arg <= 9);
1588 len += strlen(args[arg]);
1589 e++; /* skip %, digit is skipped later */
1596 /* check length of the string after substitution */
1597 if (strlen(line) + len - (int)(s - line) > buffersize)
1599 error(75); /* line too long */
1603 /* substitute pattern */
1604 strdel(line, (int)(s - line));
1605 for (e = substitution, s = line; *e != '\0'; e++)
1607 if (*e == '%' && isdigit(*(e + 1)))
1609 arg = *(e + 1) - '0';
1610 assert(arg >= 0 && arg <= 9);
1613 strins(s, args[arg], strlen(args[arg]));
1614 s += strlen(args[arg]);
1616 e++; /* skip %, digit is skipped later */
1627 for (arg = 0; arg < 10; arg++)
1635 substallpatterns(char *line, int buffersize)
1642 while (*start != '\0')
1644 /* find the start of a prefix (skip all non-alphabetic characters),
1647 while (!isalpha(*start) && *start != '_' && *start != '\0')
1650 if (is_startstring(start))
1652 start = skipstring(start);
1654 break; /* abort loop on error */
1656 start++; /* skip non-alphapetic character (or closing quote of a string) */
1659 break; /* abort loop on error */
1660 /* get the prefix (length), look for a matching definition */
1663 while (isalpha(*end) || isdigit(*end) || *end == '_')
1668 assert(prefixlen > 0);
1669 subst = find_subst(start, prefixlen);
1672 /* properly match the pattern and substitute */
1674 (start, buffersize - (start - line), subst->first,
1676 start = end; /* match failed, skip this prefix */
1677 /* match succeeded: do not update "start", because the substitution text
1678 * may be matched by other macros
1683 start = end; /* no macro with this prefix, skip this prefix */
1691 * Reads a line by readline() into "pline" and performs basic preprocessing:
1692 * deleting comments, skipping lines with false "#if.." code and recognizing
1693 * other compiler directives. There is an indirect recursion: lex() calls
1694 * preprocess() if a new line must be read, preprocess() calls command(),
1695 * which at his turn calls lex() to identify the token.
1697 * Global references: lptr (altered)
1699 * freading (referred to only)
1711 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1712 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1713 iscommand = command();
1714 if (iscommand != CMD_NONE)
1715 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1716 #if !defined NO_DEFINE
1717 if (iscommand == CMD_NONE)
1719 assert(lptr != term_expr);
1720 substallpatterns(pline, sLINEMAX);
1721 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1725 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1729 unpackedstring(char *lptr, int rawstring)
1731 while (*lptr != '\0')
1733 /* check for doublequotes indicating the end of the string */
1736 /* check whether there's another pair of quotes following.
1737 * If so, paste the two strings together, thus
1738 * "pants""off" becomes "pantsoff"
1740 if (*(lptr + 1) == '\"')
1747 { /* ignore '\a' (which was inserted at a line concatenation) */
1751 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1753 stowlit(0); /* terminate string */
1758 packedstring(char *lptr, int rawstring)
1763 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1765 while (*lptr != '\0')
1767 /* check for doublequotes indicating the end of the string */
1770 /* check whether there's another pair of quotes following.
1771 * If so, paste the two strings together, thus
1772 * "pants""off" becomes "pantsoff"
1774 if (*(lptr + 1) == '\"')
1781 { /* ignore '\a' (which was inserted at a line concatenation) */
1785 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1786 if (c >= (ucell) (1 << charbits))
1787 error(43); /* character constant exceeds range */
1788 val |= (c << 8 * i);
1794 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1796 /* save last code; make sure there is at least one terminating zero character */
1797 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1798 stowlit(val); /* at least one zero character in "val" */
1800 stowlit(0); /* add full cell of zeros */
1804 /* lex(lexvalue,lexsym) Lexical Analysis
1806 * lex() first deletes leading white space, then checks for multi-character
1807 * operators, keywords (including most compiler directives), numbers,
1808 * labels, symbols and literals (literal characters are converted to a number
1809 * and are returned as such). If every check fails, the line must contain
1810 * a single-character operator. So, lex() returns this character. In the other
1811 * case (something did match), lex() returns the number of the token. All
1812 * these tokens have been assigned numbers above 255.
1814 * Some tokens have "attributes":
1815 * tNUMBER the value of the number is return in "lexvalue".
1816 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1817 * encoding in "lexvalue".
1818 * tSYMBOL the first sNAMEMAX characters of the symbol are
1819 * stored in a buffer, a pointer to this buffer is
1820 * returned in "lexsym".
1821 * tLABEL the first sNAMEMAX characters of the label are
1822 * stored in a buffer, a pointer to this buffer is
1823 * returned in "lexsym".
1824 * tSTRING the string is stored in the literal pool, the index
1825 * in the literal pool to this string is stored in
1828 * lex() stores all information (the token found and possibly its attribute)
1829 * in global variables. This allows a token to be examined twice. If "_pushed"
1830 * is true, this information is returned.
1832 * Global references: lptr (altered)
1833 * fline (referred to only)
1834 * litidx (referred to only)
1835 * _lextok, _lexval, _lexstr
1841 static cell _lexval;
1842 static char _lexstr[sLINEMAX + 1];
1843 static int _lexnewline;
1848 stkidx = 0; /* index for pushstk() and popstk() */
1849 iflevel = 0; /* preprocessor: nesting of "#if" */
1850 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1851 icomment = FALSE; /* currently not in a multiline comment */
1852 _pushed = FALSE; /* no token pushed back into lex */
1853 _lexnewline = FALSE;
1856 char *sc_tokens[] = {
1857 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1858 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1860 "assert", "break", "case", "char", "const", "continue", "default",
1861 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1862 "if", "native", "new", "operator", "public", "return", "sizeof",
1863 "sleep", "static", "stock", "switch", "tagof", "while",
1864 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1865 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1866 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1867 "-label-", "-string-"
1871 lex(cell * lexvalue, char **lexsym)
1873 int i, toolong, newline, rawstring;
1878 _pushed = FALSE; /* reset "_pushed" flag */
1879 *lexvalue = _lexval;
1884 _lextok = 0; /* preset all values */
1887 *lexvalue = _lexval;
1889 _lexnewline = FALSE;
1893 newline = (lptr == pline); /* does lptr point to start of line buffer */
1894 while (*lptr <= ' ')
1895 { /* delete leading white space */
1898 preprocess(); /* preprocess resets "lptr" */
1901 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1902 return (_lextok = tENDEXPR);
1903 _lexnewline = TRUE; /* set this after preprocess(), because
1904 * preprocess() calls lex() recursively */
1915 for (i = 0; i < (int)(lptr - pline); i++)
1916 if (pline[i] == '\t' && sc_tabsize > 0)
1918 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1925 while (i <= tMIDDLE)
1926 { /* match multi-character operators */
1927 if (match(*tokptr, FALSE))
1936 { /* match reserved words and compiler directives */
1937 if (match(*tokptr, TRUE))
1940 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1947 if ((i = number(&_lexval, lptr)) != 0)
1950 *lexvalue = _lexval;
1953 else if ((i = ftoi(&_lexval, lptr)) != 0)
1955 _lextok = tRATIONAL;
1956 *lexvalue = _lexval;
1959 else if (alpha(*lptr))
1960 { /* symbol or label */
1961 /* Note: only sNAMEMAX characters are significant. The compiler
1962 * generates a warning if a symbol exceeds this length.
1967 while (alphanum(*lptr))
1978 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1979 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
1981 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
1983 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
1985 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
1987 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
1989 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
1990 lptr += 1; /* skip colon */
1993 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
1994 { /* unpacked string literal */
1996 rawstring = (*lptr == sc_ctrlchar);
1997 *lexvalue = _lexval = litidx;
1998 lptr += 1; /* skip double quote */
2000 lptr += 1; /* skip "escape" character too */
2002 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2005 lptr += 1; /* skip final quote */
2007 error(37); /* invalid (non-terminated) string */
2009 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2010 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2011 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2012 && *(lptr + 2) == '\"'))
2013 { /* packed string literal */
2015 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2016 *lexvalue = _lexval = litidx;
2017 lptr += 2; /* skip exclamation point and double quote */
2019 lptr += 1; /* skip "escape" character too */
2021 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2024 lptr += 1; /* skip final quote */
2026 error(37); /* invalid (non-terminated) string */
2028 else if (*lptr == '\'')
2029 { /* character literal */
2030 lptr += 1; /* skip quote */
2032 *lexvalue = _lexval = litchar(&lptr, FALSE);
2034 lptr += 1; /* skip final quote */
2036 error(27); /* invalid character constant (must be one character) */
2038 else if (*lptr == ';')
2039 { /* semicolumn resets "error" flag */
2042 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2046 _lextok = *lptr; /* if every match fails, return the character */
2047 lptr += 1; /* increase the "lptr" pointer */
2054 * Pushes a token back, so the next call to lex() will return the token
2055 * last examined, instead of a new token.
2057 * Only one token can be pushed back.
2059 * In fact, lex() already stores the information it finds into global
2060 * variables, so all that is to be done is set a flag that informs lex()
2061 * to read and return the information from these variables, rather than
2062 * to read in a new token from the input file.
2067 assert(_pushed == FALSE);
2073 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2074 * symbol (a not continue with some old one). This is required upon return
2075 * from Assembler mode.
2083 lptr = strchr(pline, '\0');
2084 assert(lptr != NULL);
2090 * This routine is useful if only a simple check is needed. If the token
2091 * differs from the one expected, it is pushed back.
2094 matchtoken(int token)
2100 tok = lex(&val, &str);
2101 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2105 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2107 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2119 * Returns additional information of a token after using "matchtoken()"
2120 * or needtoken(). It does no harm using this routine after a call to
2121 * "lex()", but lex() already returns the same information.
2123 * The token itself is the return value. Normally, this one is already known.
2126 tokeninfo(cell * val, char **str)
2128 /* if the token was pushed back, tokeninfo() returns the token and
2129 * parameters of the *next* token, not of the *current* token.
2139 * This routine checks for a required token and gives an error message if
2140 * it isn't there (and returns FALSE in that case).
2142 * Global references: _lextok;
2145 needtoken(int token)
2147 char s1[20], s2[20];
2149 if (matchtoken(token))
2155 /* token already pushed back */
2158 sprintf(s1, "%c", (char)token); /* single character token */
2160 strcpy(s1, sc_tokens[token - tFIRST]); /* multi-character symbol */
2162 strcpy(s2, "-end of file-");
2163 else if (_lextok < 256)
2164 sprintf(s2, "%c", (char)_lextok);
2166 strcpy(s2, sc_tokens[_lextok - tFIRST]);
2167 error(1, s1, s2); /* expected ..., but found ... */
2174 * Compares a series of characters from the input file with the characters
2175 * in "st" (that contains a token). If the token on the input file matches
2176 * "st", the input file pointer "lptr" is adjusted to point to the next
2177 * token, otherwise "lptr" remains unaltered.
2179 * If the parameter "end: is true, match() requires that the first character
2180 * behind the recognized token is non-alphanumeric.
2182 * Global references: lptr (altered)
2185 match(char *st, int end)
2200 { /* symbol must terminate with non-alphanumeric char */
2204 lptr = ptr; /* match found, skip symbol */
2210 * Stores a value into the literal queue. The literal queue is used for
2211 * literal strings used in functions and for initializing array variables.
2213 * Global references: litidx (altered)
2219 if (litidx >= litmax)
2223 litmax += sDEF_LITMAX;
2224 p = (cell *) realloc(litq, litmax * sizeof(cell));
2226 error(102, "literal table"); /* literal table overflow (fatal error) */
2229 assert(litidx < litmax);
2230 litq[litidx++] = value;
2235 * Return current literal character and increase the pointer to point
2236 * just behind this literal character.
2238 * Note: standard "escape sequences" are suported, but the backslash may be
2239 * replaced by another character; the syntax '\ddd' is supported,
2240 * but ddd must be decimal!
2243 litchar(char **lptr, int rawmode)
2246 unsigned char *cptr;
2248 cptr = (unsigned char *)*lptr;
2249 if (rawmode || *cptr != sc_ctrlchar)
2250 { /* no escape character */
2257 if (*cptr == sc_ctrlchar)
2259 c = *cptr; /* \\ == \ (the escape character itself) */
2266 case 'a': /* \a == audible alarm */
2270 case 'b': /* \b == backspace */
2274 case 'e': /* \e == escape */
2278 case 'f': /* \f == form feed */
2282 case 'n': /* \n == NewLine character */
2286 case 'r': /* \r == carriage return */
2290 case 't': /* \t == horizontal TAB */
2294 case 'v': /* \v == vertical TAB */
2298 case '\'': /* \' == ' (single quote) */
2299 case '"': /* \" == " (single quote) */
2300 case '%': /* \% == % (percent) */
2308 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2309 c = c * 10 + *cptr++ - '0';
2311 cptr++; /* swallow a trailing ';' */
2315 error(27); /* invalid character constant */
2320 *lptr = (char *)cptr;
2321 assert(c >= 0 && c < 256);
2327 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2328 * or an "at" sign ("@"). The "@" is an extension to standard C.
2333 return (isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2338 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2343 return (alpha(c) || isdigit(c));
2346 /* The local variable table must be searched backwards, so that the deepest
2347 * nesting of local variables is searched first. The simplest way to do
2348 * this is to insert all new items at the head of the list.
2349 * In the global list, the symbols are kept in sorted order, so that the
2350 * public functions are written in sorted order.
2353 add_symbol(symbol * root, symbol * entry, int sort)
2358 while (root->next && strcmp(entry->name, root->next->name) > 0)
2361 if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2366 memcpy(newsym, entry, sizeof(symbol));
2367 newsym->next = root->next;
2368 root->next = newsym;
2373 free_symbol(symbol * sym)
2377 /* free all sub-symbol allocated memory blocks, depending on the
2378 * kind of the symbol
2380 assert(sym != NULL);
2381 if (sym->ident == iFUNCTN)
2383 /* run through the argument list; "default array" arguments
2384 * must be freed explicitly; the tag list must also be freed */
2385 assert(sym->dim.arglist != NULL);
2386 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2388 if (arg->ident == iREFARRAY && arg->hasdefault)
2389 free(arg->defvalue.array.data);
2390 else if (arg->ident == iVARIABLE
2391 && ((arg->hasdefault & uSIZEOF) != 0
2392 || (arg->hasdefault & uTAGOF) != 0))
2393 free(arg->defvalue.size.symname);
2394 assert(arg->tags != NULL);
2397 free(sym->dim.arglist);
2399 assert(sym->refer != NULL);
2405 delete_symbol(symbol * root, symbol * sym)
2407 /* find the symbol and its predecessor
2408 * (this function assumes that you will never delete a symbol that is not
2409 * in the table pointed at by "root")
2411 assert(root != sym);
2412 while (root->next != sym)
2415 assert(root != NULL);
2418 /* unlink it, then free it */
2419 root->next = sym->next;
2424 delete_symbols(symbol * root, int level, int delete_labels,
2425 int delete_functions)
2429 /* erase only the symbols with a deeper nesting level than the
2430 * specified nesting level */
2434 if (sym->compound < level)
2436 if ((delete_labels || sym->ident != iLABEL)
2437 && (delete_functions || sym->ident != iFUNCTN
2438 || (sym->usage & uNATIVE) != 0) && (delete_functions
2439 || sym->ident != iCONSTEXPR
2440 || (sym->usage & uPREDEF) ==
2441 0) && (delete_functions
2447 root->next = sym->next;
2452 /* if the function was prototyped, but not implemented in this source,
2453 * mark it as such, so that its use can be flagged
2455 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2456 sym->usage |= uMISSING;
2457 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2458 || sym->ident == iARRAY)
2459 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2460 /* for user defined operators, also remove the "prototyped" flag, as
2461 * user-defined operators *must* be declared before use
2463 if (sym->ident == iFUNCTN && !isalpha(*sym->name)
2464 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2465 sym->usage &= ~uPROTOTYPED;
2466 root = sym; /* skip the symbol */
2471 /* The purpose of the hash is to reduce the frequency of a "name"
2472 * comparison (which is costly). There is little interest in avoiding
2473 * clusters in similar names, which is why this function is plain simple.
2476 namehash(char *name)
2478 unsigned char *ptr = (unsigned char *)name;
2479 int len = strlen(name);
2484 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2489 find_symbol(symbol * root, char *name, int fnumber)
2491 symbol *ptr = root->next;
2492 unsigned long hash = namehash(name);
2496 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2497 && !ptr->parent && (ptr->fnumber < 0
2498 || ptr->fnumber == fnumber))
2506 find_symbol_child(symbol * root, symbol * sym)
2508 symbol *ptr = root->next;
2512 if (ptr->parent == sym)
2519 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2520 * bywhom will be the function that uses a variable or that calls
2524 refer_symbol(symbol * entry, symbol * bywhom)
2528 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2529 assert(entry != NULL);
2530 assert(entry->refer != NULL);
2532 /* see if it is already there */
2533 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2536 if (count < entry->numrefers)
2538 assert(entry->refer[count] == bywhom);
2542 /* see if there is an empty spot in the referrer list */
2543 for (count = 0; count < entry->numrefers && entry->refer[count];
2546 assert(count <= entry->numrefers);
2547 if (count == entry->numrefers)
2550 int newsize = 2 * entry->numrefers;
2552 assert(newsize > 0);
2553 /* grow the referrer list */
2554 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2556 return FALSE; /* insufficient memory */
2557 /* initialize the new entries */
2558 entry->refer = refer;
2559 for (count = entry->numrefers; count < newsize; count++)
2560 entry->refer[count] = NULL;
2561 count = entry->numrefers; /* first empty spot */
2562 entry->numrefers = newsize;
2565 /* add the referrer */
2566 assert(entry->refer[count] == NULL);
2567 entry->refer[count] = bywhom;
2572 markusage(symbol * sym, int usage)
2574 sym->usage |= (char)usage;
2575 /* check if (global) reference must be added to the symbol */
2576 if ((usage & (uREAD | uWRITTEN)) != 0)
2578 /* only do this for global symbols */
2579 if (sym->vclass == sGLOBAL)
2581 /* "curfunc" should always be valid, since statements may not occurs
2582 * outside functions; in the case of syntax errors, however, the
2583 * compiler may arrive through this function
2586 refer_symbol(sym, curfunc);
2593 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2598 return find_symbol(&glbtab, name, fcurrent);
2603 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2604 * See add_symbol() how the deepest nesting level is searched first.
2609 return find_symbol(&loctab, name, -1);
2613 findconst(char *name)
2617 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2618 if (!sym || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2619 sym = find_symbol(&glbtab, name, fcurrent);
2620 if (!sym || sym->ident != iCONSTEXPR)
2622 assert(sym->parent == NULL); /* constants have no hierarchy */
2627 finddepend(symbol * parent)
2631 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2632 if (!sym) /* not found */
2633 sym = find_symbol_child(&glbtab, parent);
2639 * Adds a symbol to the symbol table (either global or local variables,
2640 * or global and local constants).
2643 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2645 symbol entry, **refer;
2647 /* global variables/constants/functions may only be defined once */
2648 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2649 || findglb(name) == NULL);
2650 /* labels may only be defined once */
2651 assert(ident != iLABEL || findloc(name) == NULL);
2653 /* create an empty referrer list */
2654 if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2656 error(103); /* insufficient memory */
2661 /* first fill in the entry */
2662 strcpy(entry.name, name);
2663 entry.hash = namehash(name);
2665 entry.vclass = (char)vclass;
2666 entry.ident = (char)ident;
2668 entry.usage = (char)usage;
2669 entry.compound = 0; /* may be overridden later */
2670 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2671 entry.numrefers = 1;
2672 entry.refer = refer;
2673 entry.parent = NULL;
2675 /* then insert it in the list */
2676 if (vclass == sGLOBAL)
2677 return add_symbol(&glbtab, &entry, TRUE);
2679 return add_symbol(&loctab, &entry, FALSE);
2683 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2684 int dim[], int numdim, int idxtag[])
2686 symbol *sym, *parent, *top;
2689 /* global variables may only be defined once */
2690 assert(vclass != sGLOBAL || (sym = findglb(name)) == NULL
2691 || (sym->usage & uDEFINE) == 0);
2693 if (ident == iARRAY || ident == iREFARRAY)
2696 sym = NULL; /* to avoid a compiler warning */
2697 for (level = 0; level < numdim; level++)
2699 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2700 top->dim.array.length = dim[level];
2701 top->dim.array.level = (short)(numdim - level - 1);
2702 top->x.idxtag = idxtag[level];
2703 top->parent = parent;
2711 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2718 * Return next available internal label number.
2728 * Converts a number to a hexadecimal string and returns a pointer to that
2734 static char itohstr[15]; /* hex number is 10 characters long at most */
2736 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2745 for (i = 0; i < max; i += 1)
2747 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2751 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2755 if (nibble[i] >= 10)
2756 *ptr++ = (char)('a' + (nibble[i] - 10));
2758 *ptr++ = (char)('0' + nibble[i]);
2761 *ptr = '\0'; /* and a zero-terminator */