1 /* Small compiler - File input, preprocessing and lexical analysis functions
3 * Copyright (c) ITB CompuPhase, 1997-2003
5 * This software is provided "as-is", without any express or implied warranty.
6 * In no event will the authors be held liable for any damages arising from
7 * the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software in
15 * a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 * 2. Altered source versions must be plainly marked as such, and must not be
18 * misrepresented as being the original software.
19 * 3. This notice may not be removed or altered from any source distribution.
35 #include "embryo_cc_sc.h"
38 static int match(char *st, int end);
39 static cell litchar(char **lptr, int rawmode);
40 static int alpha(char c);
42 static int icomment; /* currently in multiline comment? */
43 static int iflevel; /* nesting level if #if/#else/#endif */
44 static int skiplevel; /* level at which we started skipping */
45 static int elsedone; /* level at which we have seen an #else */
46 static char term_expr[] = "";
47 static int listline = -1; /* "current line" for the list file */
51 * Uses a LIFO stack to store information. The stack is used by doinclude(),
52 * doswitch() (to hold the state of "swactive") and some other routines.
54 * Porting note: I made the bold assumption that an integer will not be
55 * larger than a pointer (it may be smaller). That is, the stack element
56 * is typedef'ed as a pointer type, but I also store integers on it. See
59 * Global references: stack,stkidx (private to pushstk() and popstk())
61 static stkitem stack[sSTKMAX];
66 if (stkidx >= sSTKMAX)
67 error(102, "parser stack"); /* stack overflow (recursive include?) */
76 return (stkitem) - 1; /* stack is empty */
82 plungequalifiedfile(char *name)
84 static char *extensions[] = { ".inc", ".sma", ".small" };
92 fp = (FILE *) sc_opensrc(name);
93 ext = strchr(name, '\0'); /* save position */
96 /* try to append an extension */
97 strcpy(ext, extensions[ext_idx]);
98 fp = (FILE *) sc_opensrc(name);
100 *ext = '\0'; /* on failure, restore filename */
104 while (!fp && ext_idx < (sizeof extensions / sizeof extensions[0]));
107 *ext = '\0'; /* restore filename */
110 pushstk((stkitem) inpf);
111 pushstk((stkitem) inpfname); /* pointer to current file name */
112 pushstk((stkitem) curlibrary);
113 pushstk((stkitem) iflevel);
114 assert(skiplevel == 0);
115 pushstk((stkitem) icomment);
116 pushstk((stkitem) fcurrent);
117 pushstk((stkitem) fline);
118 inpfname = strdup(name); /* set name of include file */
120 error(103); /* insufficient memory */
121 inpf = fp; /* set input file pointer to include file */
123 fline = 0; /* set current line number to 0 */
126 setfile(inpfname, fcurrent);
127 listline = -1; /* force a #line directive when changing the file */
128 setactivefile(fcurrent);
133 plungefile(char *name, int try_currentpath, int try_includepaths)
140 result = plungequalifiedfile(name);
142 if (try_includepaths && name[0] != DIRSEP_CHAR)
144 for (i = 0; !result && (ptr = get_path(i)); i++)
148 strncpy(path, ptr, sizeof path);
149 path[sizeof path - 1] = '\0'; /* force '\0' termination */
150 strncat(path, name, sizeof(path) - strlen(path));
151 path[sizeof path - 1] = '\0';
152 result = plungequalifiedfile(path);
159 check_empty(char *lptr)
161 /* verifies that the string contains only whitespace */
162 while (*lptr <= ' ' && *lptr != '\0')
165 error(38); /* extra characters on line */
170 * Gets the name of an include file, pushes the old file on the stack and
171 * sets some options. This routine doesn't use lex(), since lex() doesn't
172 * recognize file names (and directories).
174 * Global references: inpf (altered)
182 char name[PATH_MAX], c;
185 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
187 if (*lptr == '<' || *lptr == '\"')
189 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
191 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
200 while (*lptr != c && *lptr != '\0' && i < sizeof name - 1) /* find the end of the string */
202 while (i > 0 && name[i - 1] <= ' ')
203 i--; /* strip trailing whitespace */
204 assert(i >= 0 && i < sizeof name);
205 name[i] = '\0'; /* zero-terminate the string */
208 { /* verify correct string termination */
209 error(37); /* invalid string */
213 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
215 /* Include files between "..." or without quotes are read from the current
216 * directory, or from a list of "include directories". Include files
217 * between <...> are only read from the list of include directories.
219 result = plungefile(name, (c != '>'), TRUE);
221 error(100, name); /* cannot read from ... (fatal error) */
226 * Reads in a new line from the input file pointed to by "inpf". readline()
227 * concatenates lines that end with a \ with the next line. If no more data
228 * can be read from the file, readline() attempts to pop off the previous file
229 * from the stack. If that fails too, it sets "freading" to 0.
231 * Global references: inpf,fline,inpfname,freading,icomment (altered)
239 if (lptr == term_expr)
245 if (!inpf || sc_eofsrc(inpf))
248 error(49); /* invalid line continuation */
249 if (inpf && inpf != inpf_org)
251 i = (int)(long)popstk();
253 { /* All's done; popstk() returns "stack is empty" */
256 /* when there is nothing more to read, the #if/#else stack should
257 * be empty and we should not be in a comment
259 assert(iflevel >= 0);
261 error(1, "#endif", "-end of file-");
263 error(1, "*/", "-end of file-");
267 fcurrent = (int)(long)popstk();
268 icomment = (int)(long)popstk();
269 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
270 iflevel = (int)(long)popstk();
271 curlibrary = (constvalue *) popstk();
272 free(inpfname); /* return memory allocated for the include file name */
273 inpfname = (char *)popstk();
274 inpf = (FILE *) popstk();
275 setactivefile(fcurrent);
276 listline = -1; /* force a #line directive when changing the file */
280 if (!sc_readsrc(inpf, line, num))
282 *line = '\0'; /* delete line */
287 /* check whether to erase leading spaces */
292 while (*ptr == ' ' || *ptr == '\t')
295 memmove(line, ptr, strlen(ptr) + 1);
298 /* check whether a full line was read */
299 if (!strchr(line, '\n') && !sc_eofsrc(inpf))
300 error(75); /* line too long */
301 /* check if the next line must be concatenated to this line */
302 if ((ptr = strchr(line, '\n')) && ptr > line)
304 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
306 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
307 ptr--; /* skip trailing whitespace */
311 /* set '\a' at the position of '\\' to make it possible to check
312 * for a line continuation in a single line comment (error 49)
315 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
319 line += strlen(line);
323 while (num >= 0 && cont);
328 * Replaces all comments from the line by space characters. It updates
329 * a global variable ("icomment") for multiline comments.
331 * This routine also supports the C++ extension for single line comments.
332 * These comments are started with "//" and end at the end of the line.
334 * Global references: icomment (private to "stripcom")
345 if (*line == '*' && *(line + 1) == '/')
347 icomment = FALSE; /* comment has ended */
348 *line = ' '; /* replace '*' and '/' characters by spaces */
354 if (*line == '/' && *(line + 1) == '*')
355 error(216); /* nested comment */
356 *line = ' '; /* replace comments by spaces */
362 if (*line == '/' && *(line + 1) == '*')
364 icomment = TRUE; /* start comment */
365 *line = ' '; /* replace '/' and '*' characters by spaces */
369 else if (*line == '/' && *(line + 1) == '/')
370 { /* comment to end of line */
371 if (strchr(line, '\a'))
372 error(49); /* invalid line continuation */
373 *line++ = '\n'; /* put "newline" at first slash */
374 *line = '\0'; /* put "zero-terminator" at second slash */
378 if (*line == '\"' || *line == '\'')
379 { /* leave literals unaltered */
380 c = *line; /* ending quote, single or double */
382 while ((*line != c || *(line - 1) == '\\')
385 line += 1; /* skip final quote */
398 * Attempts to interpret a numeric symbol as a boolean value. On success
399 * it returns the number of characters processed (so the line pointer can be
400 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
403 * A boolean value must start with "0b"
406 btoi(cell * val, char *curptr)
412 if (*ptr == '0' && *(ptr + 1) == 'b')
415 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
418 *val = (*val << 1) | (*ptr - '0');
426 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
429 return (int)(ptr - curptr);
434 * Attempts to interpret a numeric symbol as a decimal value. On success
435 * it returns the number of characters processed and the value is stored in
436 * "val". Otherwise it returns 0 and "val" is garbage.
439 dtoi(cell * val, char *curptr)
445 if (!isdigit(*ptr)) /* should start with digit */
447 while (isdigit(*ptr) || *ptr == '_')
450 *val = (*val * 10) + (*ptr - '0');
453 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
455 if (*ptr == '.' && isdigit(*(ptr + 1)))
456 return 0; /* but a fractional part must not be present */
457 return (int)(ptr - curptr);
462 * Attempts to interpret a numeric symbol as a hexadecimal value. On
463 * success it returns the number of characters processed and the value is
464 * stored in "val". Otherwise it return 0 and "val" is garbage.
467 htoi(cell * val, char *curptr)
473 if (!isdigit(*ptr)) /* should start with digit */
475 if (*ptr == '0' && *(ptr + 1) == 'x')
476 { /* C style hexadecimal notation */
478 while (isxdigit(*ptr) || *ptr == '_')
482 assert(isxdigit(*ptr));
485 *val += (*ptr - '0');
487 *val += (tolower(*ptr) - 'a' + 10);
499 return (int)(ptr - curptr);
529 * Attempts to interpret a numeric symbol as a rational number, either as
530 * IEEE 754 single precision floating point or as a fixed point integer.
531 * On success it returns the number of characters processed and the value is
532 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
534 * Small has stricter definition for floating point numbers than most:
535 * o the value must start with a digit; ".5" is not a valid number, you
537 * o a period must appear in the value, even if an exponent is given; "2e3"
538 * is not a valid number, you should write "2.0e3"
539 * o at least one digit must follow the period; "6." is not a valid number,
540 * you should write "6.0"
543 ftoi(cell * val, char *curptr)
546 double fnum, ffrac, fmult;
547 unsigned long dnum, dbase;
550 assert(rational_digits >= 0 && rational_digits < 9);
551 for (i = 0, dbase = 1; i < rational_digits; i++)
556 if (!isdigit(*ptr)) /* should start with digit */
558 while (isdigit(*ptr) || *ptr == '_')
562 fnum = (fnum * 10.0) + (*ptr - '0');
563 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
568 return 0; /* there must be a period */
570 if (!isdigit(*ptr)) /* there must be at least one digit after the dot */
575 while (isdigit(*ptr) || *ptr == '_')
579 ffrac = (ffrac * 10.0) + (*ptr - '0');
580 fmult = fmult / 10.0;
582 dnum += (*ptr - '0') * dbase;
583 if (dbase == 0L && sc_rationaltag && rational_digits > 0
586 error(222); /* number of digits exceeds rational number precision */
592 fnum += ffrac * fmult; /* form the number so far */
594 { /* optional fractional part */
607 if (!isdigit(*ptr)) /* 'e' should be followed by a digit */
610 while (isdigit(*ptr))
612 exp = (exp * 10) + (*ptr - '0');
616 fmult = pow10(exp * sign);
618 fmult = pow(10, exp * sign);
621 dnum *= (unsigned long)(fmult + 0.5);
624 /* decide how to store the number */
625 if (sc_rationaltag == 0)
627 error(70); /* rational number support was not enabled */
630 else if (rational_digits == 0)
632 float f = (float) fnum;
634 *val = EMBRYO_FLOAT_TO_CELL(f);
636 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
637 * format (as mandated in the ANSI standard). Test this assumption anyway.
640 float test1 = 0.0, test2 = 50.0;
641 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
642 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
644 if (c1 != 0x00000000L)
647 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
648 "point math as embryo expects. this could be bad.\n"
650 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
652 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
653 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
656 else if (c2 != 0x42480000L)
659 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
660 "point math as embryo expects. This could be bad.\n"
662 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
664 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
665 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
677 return (int)(ptr - curptr);
682 * Reads in a number (binary, decimal or hexadecimal). It returns the number
683 * of characters processed or 0 if the symbol couldn't be interpreted as a
684 * number (in this case the argument "val" remains unchanged). This routine
685 * relies on the 'early dropout' implementation of the logical or (||)
688 * Note: the routine doesn't check for a sign (+ or -). The - is checked
689 * for at "hier2()" (in fact, it is viewed as an operator, not as a
690 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
693 number(cell * val, char *curptr)
698 if ((i = btoi(&value, curptr)) != 0 /* binary? */
699 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
700 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
707 return 0; /* else not a number */
712 chrcat(char *str, char chr)
714 str = strchr(str, '\0');
720 preproc_expr(cell * val, int *tag)
727 /* Disable staging; it should be disabled already because
728 * expressions may not be cut off half-way between conditional
729 * compilations. Reset the staging index, but keep the code
732 if (stgget(&index, &code_index))
734 error(57); /* unfinished expression */
735 stgdel(0, code_index);
738 /* append a special symbol to the string, so the expression
739 * analyzer won't try to read a next line when it encounters
742 assert(strlen(pline) < sLINEMAX);
743 term = strchr(pline, '\0');
745 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
746 result = constexpr(val, tag); /* get value (or 0 on error) */
747 *term = '\0'; /* erase the token (if still present) */
748 lexclr(FALSE); /* clear any "pushed" tokens */
753 * Returns returns a pointer behind the closing quote or to the other
754 * character that caused the input to be ended.
757 getstring(char *dest, int max, char *line)
759 assert(!!dest && !!line);
761 while (*line <= ' ' && *line != '\0')
762 line++; /* skip whitespace */
765 error(37); /* invalid string */
767 else if (*line == '\0')
772 while (*line != '"' && *line != '\0')
780 lptr++; /* skip closing " */
782 error(37); /* invalid string */
801 * Recognizes the compiler directives. The function returns:
802 * CMD_NONE the line must be processed
803 * CMD_TERM a pending expression must be completed before processing further lines
804 * Other value: the line must be skipped, because:
805 * CMD_CONDFALSE false "#if.." code
806 * CMD_EMPTYLINE line is empty
807 * CMD_INCLUDE the line contains a #include directive
808 * CMD_DEFINE the line contains a #subst directive
809 * CMD_IF the line contains a #if/#else/#endif directive
810 * CMD_DIRECTIVE the line contains some other compiler directive
812 * Global variables: iflevel, skiplevel, elsedone (altered)
824 while (*lptr <= ' ' && *lptr != '\0')
827 return CMD_EMPTYLINE; /* empty line */
829 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
830 /* compiler directive found */
831 indent_nowarn = TRUE; /* allow loose indentation" */
832 lexclr(FALSE); /* clear any "pushed" tokens */
833 /* on a pending expression, force to return a silent ';' token and force to
836 if (!sc_needsemicolon && stgget(&index, &code_index))
841 tok = lex(&val, &str);
842 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
845 case tpIF: /* conditional compilation */
849 break; /* break out of switch */
850 preproc_expr(&val, NULL); /* get value (or 0 on error) */
857 if (iflevel == 0 && skiplevel == 0)
859 error(26); /* no matching #if */
864 if (elsedone == iflevel)
865 error(60); /* multiple #else directives between #if ... #endif */
867 if (skiplevel == iflevel)
869 else if (skiplevel == 0)
874 #if 0 /* ??? *really* need to use a stack here */
877 if (iflevel == 0 && skiplevel == 0)
879 error(26); /* no matching #if */
882 else if (elsedone == iflevel)
884 error(61); /* #elseif directive may not follow an #else */
889 preproc_expr(&val, NULL); /* get value (or 0 on error) */
891 skiplevel = iflevel; /* we weren't skipping, start skipping now */
893 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
894 /* else: we were skipping and condition is invalid -> keep skipping */
901 if (iflevel == 0 && skiplevel == 0)
908 if (skiplevel == iflevel)
910 if (elsedone == iflevel)
911 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
912 * the state whether an #else was seen per nesting level */
917 case tINCLUDE: /* #include directive */
925 char pathname[PATH_MAX];
927 lptr = getstring(pathname, sizeof pathname, lptr);
928 if (pathname[0] != '\0')
931 inpfname = strdup(pathname);
933 error(103); /* insufficient memory */
941 if (lex(&val, &str) != tNUMBER)
942 error(8); /* invalid/non-constant expression */
948 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
950 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
952 error(7); /* assertion failed */
959 if (lex(&val, &str) == tSYMBOL)
961 if (strcmp(str, "ctrlchar") == 0)
963 if (lex(&val, &str) != tNUMBER)
964 error(27); /* invalid character constant */
965 sc_ctrlchar = (char)val;
967 else if (strcmp(str, "compress") == 0)
971 preproc_expr(&val, NULL);
972 sc_compress = (int)val; /* switch code packing on/off */
974 else if (strcmp(str, "dynamic") == 0)
976 preproc_expr(&sc_stksize, NULL);
978 else if (strcmp(str, "library") == 0)
980 char name[sNAMEMAX + 1];
982 while (*lptr <= ' ' && *lptr != '\0')
986 lptr = getstring(name, sizeof name, lptr);
992 for (i = 0; i < sizeof name && alphanum(*lptr);
1003 if (strlen(name) > sEXPMAX)
1004 error(220, name, sEXPMAX); /* exported symbol is truncated */
1005 /* add the name if it does not yet exist in the table */
1006 if (!find_constval(&libname_tab, name, 0))
1008 append_constval(&libname_tab, name, 0, 0);
1011 else if (strcmp(str, "pack") == 0)
1015 preproc_expr(&val, NULL); /* default = packed/unpacked */
1016 sc_packstr = (int)val;
1018 else if (strcmp(str, "rational") == 0)
1020 char name[sNAMEMAX + 1];
1024 /* first gather all information, start with the tag name */
1025 while (*lptr <= ' ' && *lptr != '\0')
1027 for (i = 0; i < sizeof name && alphanum(*lptr);
1031 /* then the precision (for fixed point arithmetic) */
1032 while (*lptr <= ' ' && *lptr != '\0')
1036 preproc_expr(&digits, NULL);
1037 if (digits <= 0 || digits > 9)
1039 error(68); /* invalid rational number precision */
1045 /* add the tag (make it public) and check the values */
1046 i = sc_addtag(name);
1048 if (sc_rationaltag == 0
1049 || (sc_rationaltag == i
1050 && rational_digits == (int)digits))
1053 rational_digits = (int)digits;
1057 error(69); /* rational number format already set, can only be set once */
1060 else if (strcmp(str, "semicolon") == 0)
1064 preproc_expr(&val, NULL);
1065 sc_needsemicolon = (int)val;
1067 else if (strcmp(str, "tabsize") == 0)
1071 preproc_expr(&val, NULL);
1072 sc_tabsize = (int)val;
1074 else if (strcmp(str, "align") == 0)
1076 sc_alignnext = TRUE;
1078 else if (strcmp(str, "unused") == 0)
1080 char name[sNAMEMAX + 1];
1087 while (*lptr <= ' ' && *lptr != '\0')
1089 for (i = 0; i < sizeof name && isalpha(*lptr);
1093 /* get the symbol */
1094 sym = findloc(name);
1096 sym = findglb(name);
1099 sym->usage |= uREAD;
1100 if (sym->ident == iVARIABLE
1101 || sym->ident == iREFERENCE
1102 || sym->ident == iARRAY
1103 || sym->ident == iREFARRAY)
1104 sym->usage |= uWRITTEN;
1108 error(17, name); /* undefined symbol */
1110 /* see if a comma follows the name */
1111 while (*lptr <= ' ' && *lptr != '\0')
1113 comma = (*lptr == ',');
1121 error(207); /* unknown #pragma */
1126 error(207); /* unknown #pragma */
1137 if (inpf != inpf_org)
1145 /* write opcode to output file */
1149 while (*lptr <= ' ' && *lptr != '\0')
1151 for (i = 0; i < 40 && (isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1152 name[i] = (char)tolower(*lptr);
1157 code_idx += opcodes(1);
1158 /* write parameter (if any) */
1159 while (*lptr <= ' ' && *lptr != '\0')
1165 tok = lex(&val, &str);
1171 code_idx += opargs(1);
1177 if (!sym || (sym->ident != iFUNCTN
1178 && sym->ident != iREFFUNC
1179 && (sym->usage & uDEFINE) == 0))
1181 error(17, str); /* undefined symbol */
1185 outval(sym->addr, FALSE);
1186 /* mark symbol as "used", unknown whether for read or write */
1187 markusage(sym, uREAD | uWRITTEN);
1188 code_idx += opargs(1);
1194 extern char *sc_tokens[]; /* forward declaration */
1197 sprintf(s2, "%c", (char)tok);
1199 strcpy(s2, sc_tokens[tok - tFIRST]);
1200 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1210 #if !defined NO_DEFINE
1216 char *pattern, *substitution;
1218 int count, prefixlen;
1221 /* find the pattern to match */
1222 while (*lptr <= ' ' && *lptr != '\0')
1224 start = lptr; /* save starting point of the match pattern */
1226 while (*lptr > ' ' && *lptr != '\0')
1228 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1232 /* check pattern to match */
1233 if (!isalpha(*start) && *start != '_')
1235 error(74); /* pattern must start with an alphabetic character */
1238 /* store matched pattern */
1239 pattern = malloc(count + 1);
1241 error(103); /* insufficient memory */
1247 assert(*lptr != '\0');
1248 pattern[count++] = (char)litchar(&lptr, FALSE);
1250 pattern[count] = '\0';
1251 /* special case, erase trailing variable, because it could match anything */
1252 if (count >= 2 && isdigit(pattern[count - 1])
1253 && pattern[count - 2] == '%')
1254 pattern[count - 2] = '\0';
1255 /* find substitution string */
1256 while (*lptr <= ' ' && *lptr != '\0')
1258 start = lptr; /* save starting point of the match pattern */
1261 while (*lptr != '\0')
1263 /* keep position of the start of trailing whitespace */
1278 /* store matched substitution */
1279 substitution = malloc(count + 1); /* +1 for '\0' */
1281 error(103); /* insufficient memory */
1287 assert(*lptr != '\0');
1288 substitution[count++] = *lptr++;
1290 substitution[count] = '\0';
1291 /* check whether the definition already exists */
1292 for (prefixlen = 0, start = pattern;
1293 isalpha(*start) || isdigit(*start) || *start == '_';
1294 prefixlen++, start++)
1296 assert(prefixlen > 0);
1297 if ((def = find_subst(pattern, prefixlen)))
1299 if (strcmp(def->first, pattern) != 0
1300 || strcmp(def->second, substitution) != 0)
1301 error(201, pattern); /* redefinition of macro (non-identical) */
1302 delete_subst(pattern, prefixlen);
1304 /* add the pattern/substitution pair to the list */
1305 assert(pattern[0] != '\0');
1306 insert_subst(pattern, substitution, prefixlen);
1315 if (lex(&val, &str) == tSYMBOL)
1317 if (!delete_subst(str, strlen(str)))
1318 error(17, str); /* undefined symbol */
1322 error(20, str); /* invalid symbol name */
1329 error(31); /* unknown compiler directive */
1330 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1335 #if !defined NO_DEFINE
1337 is_startstring(char *string)
1339 if (*string == '\"' || *string == '\'')
1340 return TRUE; /* "..." */
1345 if (*string == '\"' || *string == '\'')
1346 return TRUE; /* !"..." */
1347 if (*string == sc_ctrlchar)
1350 if (*string == '\"' || *string == '\'')
1351 return TRUE; /* !\"..." */
1354 else if (*string == sc_ctrlchar)
1357 if (*string == '\"' || *string == '\'')
1358 return TRUE; /* \"..." */
1362 if (*string == '\"' || *string == '\'')
1363 return TRUE; /* \!"..." */
1371 skipstring(char *string)
1374 int rawstring = FALSE;
1376 while (*string == '!' || *string == sc_ctrlchar)
1378 rawstring = (*string == sc_ctrlchar);
1383 assert(endquote == '\"' || endquote == '\'');
1384 string++; /* skip open quote */
1385 while (*string != endquote && *string != '\0')
1386 litchar(&string, rawstring);
1391 skippgroup(char *string)
1394 char open = *string;
1413 close = '\0'; /* only to avoid a compiler warning */
1417 while (*string != close || nest > 0)
1419 if (*string == open)
1421 else if (*string == close)
1423 else if (is_startstring(string))
1424 string = skipstring(string);
1425 if (*string == '\0')
1433 strdel(char *str, size_t len)
1435 size_t length = strlen(str);
1439 memmove(str, str + len, length - len + 1); /* include EOS byte */
1444 strins(char *dest, char *src, size_t srclen)
1446 size_t destlen = strlen(dest);
1448 assert(srclen <= strlen(src));
1449 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1450 memcpy(dest, src, srclen);
1455 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1458 char *p, *s, *e, *args[10];
1459 int match, arg, len;
1461 memset(args, 0, sizeof args);
1463 /* check the length of the prefix */
1464 for (prefixlen = 0, s = pattern; isalpha(*s) || isdigit(*s) || *s == '_';
1467 assert(prefixlen > 0);
1468 assert(strncmp(line, pattern, prefixlen) == 0);
1470 /* pattern prefix matches; match the rest of the pattern, gather
1473 s = line + prefixlen;
1474 p = pattern + prefixlen;
1475 match = TRUE; /* so far, pattern matches */
1476 while (match && *s != '\0' && *p != '\0')
1484 assert(arg >= 0 && arg <= 9);
1485 p++; /* skip parameter id */
1487 /* match the source string up to the character after the digit
1488 * (skipping strings in the process
1491 while (*e != *p && *e != '\0' && *e != '\n')
1493 if (is_startstring(e)) /* skip strings */
1495 else if (strchr("({[", *e)) /* skip parenthized groups */
1498 e++; /* skip non-alphapetic character (or closing quote of
1499 * a string, or the closing paranthese of a group) */
1501 /* store the parameter (overrule any earlier) */
1505 args[arg] = malloc(len + 1);
1507 error(103); /* insufficient memory */
1508 strncpy(args[arg], s, len);
1509 args[arg][len] = '\0';
1510 /* character behind the pattern was matched too */
1515 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1516 && !sc_needsemicolon)
1518 s = e; /* allow a trailing ; in the pattern match to end of line */
1522 assert(*e == '\0' || *e == '\n');
1533 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1535 /* source may be ';' or end of the line */
1536 while (*s <= ' ' && *s != '\0')
1537 s++; /* skip white space */
1538 if (*s != ';' && *s != '\0')
1540 p++; /* skip the semicolon in the pattern */
1546 /* skip whitespace between two non-alphanumeric characters, except
1547 * for two identical symbols
1549 assert(p > pattern);
1550 if (!alphanum(*p) && *(p - 1) != *p)
1551 while (*s <= ' ' && *s != '\0')
1552 s++; /* skip white space */
1553 ch = litchar(&p, FALSE); /* this increments "p" */
1557 s++; /* this character matches */
1561 if (match && *p == '\0')
1563 /* if the last character to match is an alphanumeric character, the
1564 * current character in the source may not be alphanumeric
1566 assert(p > pattern);
1567 if (alphanum(*(p - 1)) && alphanum(*s))
1573 /* calculate the length of the substituted string */
1574 for (e = substitution, len = 0; *e != '\0'; e++)
1576 if (*e == '%' && isdigit(*(e + 1)))
1578 arg = *(e + 1) - '0';
1579 assert(arg >= 0 && arg <= 9);
1581 len += strlen(args[arg]);
1582 e++; /* skip %, digit is skipped later */
1589 /* check length of the string after substitution */
1590 if (strlen(line) + len - (int)(s - line) > buffersize)
1592 error(75); /* line too long */
1596 /* substitute pattern */
1597 strdel(line, (int)(s - line));
1598 for (e = substitution, s = line; *e != '\0'; e++)
1600 if (*e == '%' && isdigit(*(e + 1)))
1602 arg = *(e + 1) - '0';
1603 assert(arg >= 0 && arg <= 9);
1606 strins(s, args[arg], strlen(args[arg]));
1607 s += strlen(args[arg]);
1609 e++; /* skip %, digit is skipped later */
1620 for (arg = 0; arg < 10; arg++)
1628 substallpatterns(char *line, int buffersize)
1635 while (*start != '\0')
1637 /* find the start of a prefix (skip all non-alphabetic characters),
1640 while (!isalpha(*start) && *start != '_' && *start != '\0')
1643 if (is_startstring(start))
1645 start = skipstring(start);
1647 break; /* abort loop on error */
1649 start++; /* skip non-alphapetic character (or closing quote of a string) */
1652 break; /* abort loop on error */
1653 /* get the prefix (length), look for a matching definition */
1656 while (isalpha(*end) || isdigit(*end) || *end == '_')
1661 assert(prefixlen > 0);
1662 subst = find_subst(start, prefixlen);
1665 /* properly match the pattern and substitute */
1667 (start, buffersize - (start - line), subst->first,
1669 start = end; /* match failed, skip this prefix */
1670 /* match succeeded: do not update "start", because the substitution text
1671 * may be matched by other macros
1676 start = end; /* no macro with this prefix, skip this prefix */
1684 * Reads a line by readline() into "pline" and performs basic preprocessing:
1685 * deleting comments, skipping lines with false "#if.." code and recognizing
1686 * other compiler directives. There is an indirect recursion: lex() calls
1687 * preprocess() if a new line must be read, preprocess() calls command(),
1688 * which at his turn calls lex() to identify the token.
1690 * Global references: lptr (altered)
1692 * freading (referred to only)
1704 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1705 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1706 iscommand = command();
1707 if (iscommand != CMD_NONE)
1708 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1709 #if !defined NO_DEFINE
1710 if (iscommand == CMD_NONE)
1712 assert(lptr != term_expr);
1713 substallpatterns(pline, sLINEMAX);
1714 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1718 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1722 unpackedstring(char *lptr, int rawstring)
1724 while (*lptr != '\0')
1726 /* check for doublequotes indicating the end of the string */
1729 /* check whether there's another pair of quotes following.
1730 * If so, paste the two strings together, thus
1731 * "pants""off" becomes "pantsoff"
1733 if (*(lptr + 1) == '\"')
1740 { /* ignore '\a' (which was inserted at a line concatenation) */
1744 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1746 stowlit(0); /* terminate string */
1751 packedstring(char *lptr, int rawstring)
1756 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1758 while (*lptr != '\0')
1760 /* check for doublequotes indicating the end of the string */
1763 /* check whether there's another pair of quotes following.
1764 * If so, paste the two strings together, thus
1765 * "pants""off" becomes "pantsoff"
1767 if (*(lptr + 1) == '\"')
1774 { /* ignore '\a' (which was inserted at a line concatenation) */
1778 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1779 if (c >= (ucell) (1 << charbits))
1780 error(43); /* character constant exceeds range */
1781 val |= (c << 8 * i);
1787 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1789 /* save last code; make sure there is at least one terminating zero character */
1790 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1791 stowlit(val); /* at least one zero character in "val" */
1793 stowlit(0); /* add full cell of zeros */
1797 /* lex(lexvalue,lexsym) Lexical Analysis
1799 * lex() first deletes leading white space, then checks for multi-character
1800 * operators, keywords (including most compiler directives), numbers,
1801 * labels, symbols and literals (literal characters are converted to a number
1802 * and are returned as such). If every check fails, the line must contain
1803 * a single-character operator. So, lex() returns this character. In the other
1804 * case (something did match), lex() returns the number of the token. All
1805 * these tokens have been assigned numbers above 255.
1807 * Some tokens have "attributes":
1808 * tNUMBER the value of the number is return in "lexvalue".
1809 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1810 * encoding in "lexvalue".
1811 * tSYMBOL the first sNAMEMAX characters of the symbol are
1812 * stored in a buffer, a pointer to this buffer is
1813 * returned in "lexsym".
1814 * tLABEL the first sNAMEMAX characters of the label are
1815 * stored in a buffer, a pointer to this buffer is
1816 * returned in "lexsym".
1817 * tSTRING the string is stored in the literal pool, the index
1818 * in the literal pool to this string is stored in
1821 * lex() stores all information (the token found and possibly its attribute)
1822 * in global variables. This allows a token to be examined twice. If "_pushed"
1823 * is true, this information is returned.
1825 * Global references: lptr (altered)
1826 * fline (referred to only)
1827 * litidx (referred to only)
1828 * _lextok, _lexval, _lexstr
1834 static cell _lexval;
1835 static char _lexstr[sLINEMAX + 1];
1836 static int _lexnewline;
1841 stkidx = 0; /* index for pushstk() and popstk() */
1842 iflevel = 0; /* preprocessor: nesting of "#if" */
1843 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1844 icomment = FALSE; /* currently not in a multiline comment */
1845 _pushed = FALSE; /* no token pushed back into lex */
1846 _lexnewline = FALSE;
1849 char *sc_tokens[] = {
1850 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1851 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1853 "assert", "break", "case", "char", "const", "continue", "default",
1854 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1855 "if", "native", "new", "operator", "public", "return", "sizeof",
1856 "sleep", "static", "stock", "switch", "tagof", "while",
1857 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1858 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1859 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1860 "-label-", "-string-"
1864 lex(cell * lexvalue, char **lexsym)
1866 int i, toolong, newline, rawstring;
1871 _pushed = FALSE; /* reset "_pushed" flag */
1872 *lexvalue = _lexval;
1877 _lextok = 0; /* preset all values */
1880 *lexvalue = _lexval;
1882 _lexnewline = FALSE;
1886 newline = (lptr == pline); /* does lptr point to start of line buffer */
1887 while (*lptr <= ' ')
1888 { /* delete leading white space */
1891 preprocess(); /* preprocess resets "lptr" */
1894 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1895 return (_lextok = tENDEXPR);
1896 _lexnewline = TRUE; /* set this after preprocess(), because
1897 * preprocess() calls lex() recursively */
1908 for (i = 0; i < (int)(lptr - pline); i++)
1909 if (pline[i] == '\t' && sc_tabsize > 0)
1911 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1918 while (i <= tMIDDLE)
1919 { /* match multi-character operators */
1920 if (match(*tokptr, FALSE))
1929 { /* match reserved words and compiler directives */
1930 if (match(*tokptr, TRUE))
1933 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1940 if ((i = number(&_lexval, lptr)) != 0)
1943 *lexvalue = _lexval;
1946 else if ((i = ftoi(&_lexval, lptr)) != 0)
1948 _lextok = tRATIONAL;
1949 *lexvalue = _lexval;
1952 else if (alpha(*lptr))
1953 { /* symbol or label */
1954 /* Note: only sNAMEMAX characters are significant. The compiler
1955 * generates a warning if a symbol exceeds this length.
1960 while (alphanum(*lptr))
1971 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1972 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
1974 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
1976 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
1978 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
1980 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
1982 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
1983 lptr += 1; /* skip colon */
1986 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
1987 { /* unpacked string literal */
1989 rawstring = (*lptr == sc_ctrlchar);
1990 *lexvalue = _lexval = litidx;
1991 lptr += 1; /* skip double quote */
1993 lptr += 1; /* skip "escape" character too */
1995 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
1998 lptr += 1; /* skip final quote */
2000 error(37); /* invalid (non-terminated) string */
2002 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2003 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2004 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2005 && *(lptr + 2) == '\"'))
2006 { /* packed string literal */
2008 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2009 *lexvalue = _lexval = litidx;
2010 lptr += 2; /* skip exclamation point and double quote */
2012 lptr += 1; /* skip "escape" character too */
2014 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2017 lptr += 1; /* skip final quote */
2019 error(37); /* invalid (non-terminated) string */
2021 else if (*lptr == '\'')
2022 { /* character literal */
2023 lptr += 1; /* skip quote */
2025 *lexvalue = _lexval = litchar(&lptr, FALSE);
2027 lptr += 1; /* skip final quote */
2029 error(27); /* invalid character constant (must be one character) */
2031 else if (*lptr == ';')
2032 { /* semicolumn resets "error" flag */
2035 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2039 _lextok = *lptr; /* if every match fails, return the character */
2040 lptr += 1; /* increase the "lptr" pointer */
2047 * Pushes a token back, so the next call to lex() will return the token
2048 * last examined, instead of a new token.
2050 * Only one token can be pushed back.
2052 * In fact, lex() already stores the information it finds into global
2053 * variables, so all that is to be done is set a flag that informs lex()
2054 * to read and return the information from these variables, rather than
2055 * to read in a new token from the input file.
2060 assert(_pushed == FALSE);
2066 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2067 * symbol (a not continue with some old one). This is required upon return
2068 * from Assembler mode.
2076 lptr = strchr(pline, '\0');
2083 * This routine is useful if only a simple check is needed. If the token
2084 * differs from the one expected, it is pushed back.
2087 matchtoken(int token)
2093 tok = lex(&val, &str);
2094 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2098 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2100 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2112 * Returns additional information of a token after using "matchtoken()"
2113 * or needtoken(). It does no harm using this routine after a call to
2114 * "lex()", but lex() already returns the same information.
2116 * The token itself is the return value. Normally, this one is already known.
2119 tokeninfo(cell * val, char **str)
2121 /* if the token was pushed back, tokeninfo() returns the token and
2122 * parameters of the *next* token, not of the *current* token.
2132 * This routine checks for a required token and gives an error message if
2133 * it isn't there (and returns FALSE in that case).
2135 * Global references: _lextok;
2138 needtoken(int token)
2140 char s1[20], s2[20];
2142 if (matchtoken(token))
2148 /* token already pushed back */
2151 sprintf(s1, "%c", (char)token); /* single character token */
2153 strcpy(s1, sc_tokens[token - tFIRST]); /* multi-character symbol */
2155 strcpy(s2, "-end of file-");
2156 else if (_lextok < 256)
2157 sprintf(s2, "%c", (char)_lextok);
2159 strcpy(s2, sc_tokens[_lextok - tFIRST]);
2160 error(1, s1, s2); /* expected ..., but found ... */
2167 * Compares a series of characters from the input file with the characters
2168 * in "st" (that contains a token). If the token on the input file matches
2169 * "st", the input file pointer "lptr" is adjusted to point to the next
2170 * token, otherwise "lptr" remains unaltered.
2172 * If the parameter "end: is true, match() requires that the first character
2173 * behind the recognized token is non-alphanumeric.
2175 * Global references: lptr (altered)
2178 match(char *st, int end)
2193 { /* symbol must terminate with non-alphanumeric char */
2197 lptr = ptr; /* match found, skip symbol */
2203 * Stores a value into the literal queue. The literal queue is used for
2204 * literal strings used in functions and for initializing array variables.
2206 * Global references: litidx (altered)
2212 if (litidx >= litmax)
2216 litmax += sDEF_LITMAX;
2217 p = (cell *) realloc(litq, litmax * sizeof(cell));
2219 error(102, "literal table"); /* literal table overflow (fatal error) */
2222 assert(litidx < litmax);
2223 litq[litidx++] = value;
2228 * Return current literal character and increase the pointer to point
2229 * just behind this literal character.
2231 * Note: standard "escape sequences" are suported, but the backslash may be
2232 * replaced by another character; the syntax '\ddd' is supported,
2233 * but ddd must be decimal!
2236 litchar(char **lptr, int rawmode)
2239 unsigned char *cptr;
2241 cptr = (unsigned char *)*lptr;
2242 if (rawmode || *cptr != sc_ctrlchar)
2243 { /* no escape character */
2250 if (*cptr == sc_ctrlchar)
2252 c = *cptr; /* \\ == \ (the escape character itself) */
2259 case 'a': /* \a == audible alarm */
2263 case 'b': /* \b == backspace */
2267 case 'e': /* \e == escape */
2271 case 'f': /* \f == form feed */
2275 case 'n': /* \n == NewLine character */
2279 case 'r': /* \r == carriage return */
2283 case 't': /* \t == horizontal TAB */
2287 case 'v': /* \v == vertical TAB */
2291 case '\'': /* \' == ' (single quote) */
2292 case '"': /* \" == " (single quote) */
2293 case '%': /* \% == % (percent) */
2301 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2302 c = c * 10 + *cptr++ - '0';
2304 cptr++; /* swallow a trailing ';' */
2308 error(27); /* invalid character constant */
2313 *lptr = (char *)cptr;
2314 assert(c >= 0 && c < 256);
2320 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2321 * or an "at" sign ("@"). The "@" is an extension to standard C.
2326 return (isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2331 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2336 return (alpha(c) || isdigit(c));
2339 /* The local variable table must be searched backwards, so that the deepest
2340 * nesting of local variables is searched first. The simplest way to do
2341 * this is to insert all new items at the head of the list.
2342 * In the global list, the symbols are kept in sorted order, so that the
2343 * public functions are written in sorted order.
2346 add_symbol(symbol * root, symbol * entry, int sort)
2351 while (root->next && strcmp(entry->name, root->next->name) > 0)
2354 if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2359 memcpy(newsym, entry, sizeof(symbol));
2360 newsym->next = root->next;
2361 root->next = newsym;
2366 free_symbol(symbol * sym)
2370 /* free all sub-symbol allocated memory blocks, depending on the
2371 * kind of the symbol
2374 if (sym->ident == iFUNCTN)
2376 /* run through the argument list; "default array" arguments
2377 * must be freed explicitly; the tag list must also be freed */
2378 assert(!!sym->dim.arglist);
2379 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2381 if (arg->ident == iREFARRAY && arg->hasdefault)
2382 free(arg->defvalue.array.data);
2383 else if (arg->ident == iVARIABLE
2384 && ((arg->hasdefault & uSIZEOF) != 0
2385 || (arg->hasdefault & uTAGOF) != 0))
2386 free(arg->defvalue.size.symname);
2387 assert(!!arg->tags);
2390 free(sym->dim.arglist);
2392 assert(!!sym->refer);
2398 delete_symbol(symbol * root, symbol * sym)
2400 /* find the symbol and its predecessor
2401 * (this function assumes that you will never delete a symbol that is not
2402 * in the table pointed at by "root")
2404 assert(root != sym);
2405 while (root->next != sym)
2411 /* unlink it, then free it */
2412 root->next = sym->next;
2417 delete_symbols(symbol * root, int level, int delete_labels,
2418 int delete_functions)
2422 /* erase only the symbols with a deeper nesting level than the
2423 * specified nesting level */
2427 if (sym->compound < level)
2429 if ((delete_labels || sym->ident != iLABEL)
2430 && (delete_functions || sym->ident != iFUNCTN
2431 || (sym->usage & uNATIVE) != 0) && (delete_functions
2432 || sym->ident != iCONSTEXPR
2433 || (sym->usage & uPREDEF) ==
2434 0) && (delete_functions
2440 root->next = sym->next;
2445 /* if the function was prototyped, but not implemented in this source,
2446 * mark it as such, so that its use can be flagged
2448 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2449 sym->usage |= uMISSING;
2450 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2451 || sym->ident == iARRAY)
2452 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2453 /* for user defined operators, also remove the "prototyped" flag, as
2454 * user-defined operators *must* be declared before use
2456 if (sym->ident == iFUNCTN && !isalpha(*sym->name)
2457 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2458 sym->usage &= ~uPROTOTYPED;
2459 root = sym; /* skip the symbol */
2464 /* The purpose of the hash is to reduce the frequency of a "name"
2465 * comparison (which is costly). There is little interest in avoiding
2466 * clusters in similar names, which is why this function is plain simple.
2469 namehash(char *name)
2471 unsigned char *ptr = (unsigned char *)name;
2472 int len = strlen(name);
2477 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2482 find_symbol(symbol * root, char *name, int fnumber)
2484 symbol *ptr = root->next;
2485 unsigned long hash = namehash(name);
2489 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2490 && !ptr->parent && (ptr->fnumber < 0
2491 || ptr->fnumber == fnumber))
2499 find_symbol_child(symbol * root, symbol * sym)
2501 symbol *ptr = root->next;
2505 if (ptr->parent == sym)
2512 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2513 * bywhom will be the function that uses a variable or that calls
2517 refer_symbol(symbol * entry, symbol * bywhom)
2521 assert(!!bywhom); /* it makes no sense to add a "void" referrer */
2523 assert(!!entry->refer);
2525 /* see if it is already there */
2526 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2529 if (count < entry->numrefers)
2531 assert(entry->refer[count] == bywhom);
2535 /* see if there is an empty spot in the referrer list */
2536 for (count = 0; count < entry->numrefers && entry->refer[count];
2539 assert(count <= entry->numrefers);
2540 if (count == entry->numrefers)
2543 int newsize = 2 * entry->numrefers;
2545 assert(newsize > 0);
2546 /* grow the referrer list */
2547 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2549 return FALSE; /* insufficient memory */
2550 /* initialize the new entries */
2551 entry->refer = refer;
2552 for (count = entry->numrefers; count < newsize; count++)
2553 entry->refer[count] = NULL;
2554 count = entry->numrefers; /* first empty spot */
2555 entry->numrefers = newsize;
2558 /* add the referrer */
2559 assert(!entry->refer[count]);
2560 entry->refer[count] = bywhom;
2565 markusage(symbol * sym, int usage)
2567 sym->usage |= (char)usage;
2568 /* check if (global) reference must be added to the symbol */
2569 if ((usage & (uREAD | uWRITTEN)) != 0)
2571 /* only do this for global symbols */
2572 if (sym->vclass == sGLOBAL)
2574 /* "curfunc" should always be valid, since statements may not occurs
2575 * outside functions; in the case of syntax errors, however, the
2576 * compiler may arrive through this function
2579 refer_symbol(sym, curfunc);
2586 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2591 return find_symbol(&glbtab, name, fcurrent);
2596 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2597 * See add_symbol() how the deepest nesting level is searched first.
2602 return find_symbol(&loctab, name, -1);
2606 findconst(char *name)
2610 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2611 if (!sym || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2612 sym = find_symbol(&glbtab, name, fcurrent);
2613 if (!sym || sym->ident != iCONSTEXPR)
2615 assert(!sym->parent); /* constants have no hierarchy */
2620 finddepend(symbol * parent)
2624 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2625 if (!sym) /* not found */
2626 sym = find_symbol_child(&glbtab, parent);
2632 * Adds a symbol to the symbol table (either global or local variables,
2633 * or global and local constants).
2636 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2638 symbol entry, **refer;
2640 /* global variables/constants/functions may only be defined once */
2641 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2643 /* labels may only be defined once */
2644 assert(ident != iLABEL || !findloc(name));
2646 /* create an empty referrer list */
2647 if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2649 error(103); /* insufficient memory */
2654 /* first fill in the entry */
2655 strcpy(entry.name, name);
2656 entry.hash = namehash(name);
2658 entry.vclass = (char)vclass;
2659 entry.ident = (char)ident;
2661 entry.usage = (char)usage;
2662 entry.compound = 0; /* may be overridden later */
2663 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2664 entry.numrefers = 1;
2665 entry.refer = refer;
2666 entry.parent = NULL;
2668 /* then insert it in the list */
2669 if (vclass == sGLOBAL)
2670 return add_symbol(&glbtab, &entry, TRUE);
2672 return add_symbol(&loctab, &entry, FALSE);
2676 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2677 int dim[], int numdim, int idxtag[])
2679 symbol *sym, *parent, *top;
2682 /* global variables may only be defined once */
2683 assert(vclass != sGLOBAL || !(sym = findglb(name))
2684 || (sym->usage & uDEFINE) == 0);
2686 if (ident == iARRAY || ident == iREFARRAY)
2689 sym = NULL; /* to avoid a compiler warning */
2690 for (level = 0; level < numdim; level++)
2692 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2693 top->dim.array.length = dim[level];
2694 top->dim.array.level = (short)(numdim - level - 1);
2695 top->x.idxtag = idxtag[level];
2696 top->parent = parent;
2704 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2711 * Return next available internal label number.
2721 * Converts a number to a hexadecimal string and returns a pointer to that
2727 static char itohstr[15]; /* hex number is 10 characters long at most */
2729 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2738 for (i = 0; i < max; i += 1)
2740 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2744 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2748 if (nibble[i] >= 10)
2749 *ptr++ = (char)('a' + (nibble[i] - 10));
2751 *ptr++ = (char)('0' + nibble[i]);
2754 *ptr = '\0'; /* and a zero-terminator */