1 /* Small compiler - File input, preprocessing and lexical analysis functions
3 * Copyright (c) ITB CompuPhase, 1997-2003
5 * This software is provided "as-is", without any express or implied warranty.
6 * In no event will the authors be held liable for any damages arising from
7 * the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software in
15 * a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 * 2. Altered source versions must be plainly marked as such, and must not be
18 * misrepresented as being the original software.
19 * 3. This notice may not be removed or altered from any source distribution.
35 #include "embryo_cc_sc.h"
38 static int match(char *st, int end);
39 static cell litchar(char **lptr, int rawmode);
40 static int alpha(char c);
42 static int icomment; /* currently in multiline comment? */
43 static int iflevel; /* nesting level if #if/#else/#endif */
44 static int skiplevel; /* level at which we started skipping */
45 static int elsedone; /* level at which we have seen an #else */
46 static char term_expr[] = "";
47 static int listline = -1; /* "current line" for the list file */
51 * Uses a LIFO stack to store information. The stack is used by doinclude(),
52 * doswitch() (to hold the state of "swactive") and some other routines.
54 * Porting note: I made the bold assumption that an integer will not be
55 * larger than a pointer (it may be smaller). That is, the stack element
56 * is typedef'ed as a pointer type, but I also store integers on it. See
59 * Global references: stack,stkidx (private to pushstk() and popstk())
61 static stkitem stack[sSTKMAX];
66 if (stkidx >= sSTKMAX)
67 error(102, "parser stack"); /* stack overflow (recursive include?) */
76 return (stkitem) - 1; /* stack is empty */
82 plungequalifiedfile(char *name)
84 static char *extensions[] = { ".inc", ".sma", ".small" };
92 fp = (FILE *) sc_opensrc(name);
93 ext = strchr(name, '\0'); /* save position */
96 /* try to append an extension */
97 strcpy(ext, extensions[ext_idx]);
98 fp = (FILE *) sc_opensrc(name);
100 *ext = '\0'; /* on failure, restore filename */
105 (ext_idx < (int)(sizeof extensions / sizeof extensions[0])));
108 *ext = '\0'; /* restore filename */
111 pushstk((stkitem) inpf);
112 pushstk((stkitem) inpfname); /* pointer to current file name */
113 pushstk((stkitem) curlibrary);
114 pushstk((stkitem) iflevel);
115 assert(skiplevel == 0);
116 pushstk((stkitem) icomment);
117 pushstk((stkitem) fcurrent);
118 pushstk((stkitem) fline);
119 inpfname = strdup(name); /* set name of include file */
121 error(103); /* insufficient memory */
122 inpf = fp; /* set input file pointer to include file */
124 fline = 0; /* set current line number to 0 */
127 setfile(inpfname, fcurrent);
128 listline = -1; /* force a #line directive when changing the file */
129 setactivefile(fcurrent);
134 plungefile(char *name, int try_currentpath, int try_includepaths)
141 result = plungequalifiedfile(name);
143 if (try_includepaths && name[0] != DIRSEP_CHAR)
145 for (i = 0; !result && (ptr = get_path(i)); i++)
149 strncpy(path, ptr, sizeof path);
150 path[sizeof path - 1] = '\0'; /* force '\0' termination */
151 strncat(path, name, sizeof(path) - strlen(path));
152 path[sizeof path - 1] = '\0';
153 result = plungequalifiedfile(path);
160 check_empty(char *lptr)
162 /* verifies that the string contains only whitespace */
163 while (*lptr <= ' ' && *lptr != '\0')
166 error(38); /* extra characters on line */
171 * Gets the name of an include file, pushes the old file on the stack and
172 * sets some options. This routine doesn't use lex(), since lex() doesn't
173 * recognize file names (and directories).
175 * Global references: inpf (altered)
183 char name[PATH_MAX], c;
186 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
188 if (*lptr == '<' || *lptr == '\"')
190 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
192 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
201 while ((*lptr != c) && (*lptr != '\0') && (i < (int)(sizeof(name) - 1))) /* find the end of the string */
203 while (i > 0 && name[i - 1] <= ' ')
204 i--; /* strip trailing whitespace */
205 assert((i >= 0) && (i < (int)(sizeof(name))));
206 name[i] = '\0'; /* zero-terminate the string */
209 { /* verify correct string termination */
210 error(37); /* invalid string */
214 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
216 /* Include files between "..." or without quotes are read from the current
217 * directory, or from a list of "include directories". Include files
218 * between <...> are only read from the list of include directories.
220 result = plungefile(name, (c != '>'), TRUE);
222 error(100, name); /* cannot read from ... (fatal error) */
227 * Reads in a new line from the input file pointed to by "inpf". readline()
228 * concatenates lines that end with a \ with the next line. If no more data
229 * can be read from the file, readline() attempts to pop off the previous file
230 * from the stack. If that fails too, it sets "freading" to 0.
232 * Global references: inpf,fline,inpfname,freading,icomment (altered)
240 if (lptr == term_expr)
246 if (!inpf || sc_eofsrc(inpf))
249 error(49); /* invalid line continuation */
250 if (inpf && inpf != inpf_org)
252 i = (int)(long)popstk();
254 { /* All's done; popstk() returns "stack is empty" */
257 /* when there is nothing more to read, the #if/#else stack should
258 * be empty and we should not be in a comment
260 assert(iflevel >= 0);
262 error(1, "#endif", "-end of file-");
264 error(1, "*/", "-end of file-");
268 fcurrent = (int)(long)popstk();
269 icomment = (int)(long)popstk();
270 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
271 iflevel = (int)(long)popstk();
272 curlibrary = (constvalue *) popstk();
273 free(inpfname); /* return memory allocated for the include file name */
274 inpfname = (char *)popstk();
275 inpf = (FILE *) popstk();
276 setactivefile(fcurrent);
277 listline = -1; /* force a #line directive when changing the file */
281 if (!sc_readsrc(inpf, line, num))
283 *line = '\0'; /* delete line */
288 /* check whether to erase leading spaces */
293 while (*ptr == ' ' || *ptr == '\t')
296 memmove(line, ptr, strlen(ptr) + 1);
299 /* check whether a full line was read */
300 if (!strchr(line, '\n') && !sc_eofsrc(inpf))
301 error(75); /* line too long */
302 /* check if the next line must be concatenated to this line */
303 if ((ptr = strchr(line, '\n')) && ptr > line)
305 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
307 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
308 ptr--; /* skip trailing whitespace */
312 /* set '\a' at the position of '\\' to make it possible to check
313 * for a line continuation in a single line comment (error 49)
316 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
320 line += strlen(line);
324 while (num >= 0 && cont);
329 * Replaces all comments from the line by space characters. It updates
330 * a global variable ("icomment") for multiline comments.
332 * This routine also supports the C++ extension for single line comments.
333 * These comments are started with "//" and end at the end of the line.
335 * Global references: icomment (private to "stripcom")
346 if (*line == '*' && *(line + 1) == '/')
348 icomment = FALSE; /* comment has ended */
349 *line = ' '; /* replace '*' and '/' characters by spaces */
355 if (*line == '/' && *(line + 1) == '*')
356 error(216); /* nested comment */
357 *line = ' '; /* replace comments by spaces */
363 if (*line == '/' && *(line + 1) == '*')
365 icomment = TRUE; /* start comment */
366 *line = ' '; /* replace '/' and '*' characters by spaces */
370 else if (*line == '/' && *(line + 1) == '/')
371 { /* comment to end of line */
372 if (strchr(line, '\a'))
373 error(49); /* invalid line continuation */
374 *line++ = '\n'; /* put "newline" at first slash */
375 *line = '\0'; /* put "zero-terminator" at second slash */
379 if (*line == '\"' || *line == '\'')
380 { /* leave literals unaltered */
381 c = *line; /* ending quote, single or double */
383 while ((*line != c || *(line - 1) == '\\')
386 line += 1; /* skip final quote */
399 * Attempts to interpret a numeric symbol as a boolean value. On success
400 * it returns the number of characters processed (so the line pointer can be
401 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
404 * A boolean value must start with "0b"
407 btoi(cell * val, char *curptr)
413 if (*ptr == '0' && *(ptr + 1) == 'b')
416 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
419 *val = (*val << 1) | (*ptr - '0');
427 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
430 return (int)(ptr - curptr);
435 * Attempts to interpret a numeric symbol as a decimal value. On success
436 * it returns the number of characters processed and the value is stored in
437 * "val". Otherwise it returns 0 and "val" is garbage.
440 dtoi(cell * val, char *curptr)
446 if (!isdigit(*ptr)) /* should start with digit */
448 while (isdigit(*ptr) || *ptr == '_')
451 *val = (*val * 10) + (*ptr - '0');
454 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
456 if (*ptr == '.' && isdigit(*(ptr + 1)))
457 return 0; /* but a fractional part must not be present */
458 return (int)(ptr - curptr);
463 * Attempts to interpret a numeric symbol as a hexadecimal value. On
464 * success it returns the number of characters processed and the value is
465 * stored in "val". Otherwise it return 0 and "val" is garbage.
468 htoi(cell * val, char *curptr)
474 if (!isdigit(*ptr)) /* should start with digit */
476 if (*ptr == '0' && *(ptr + 1) == 'x')
477 { /* C style hexadecimal notation */
479 while (isxdigit(*ptr) || *ptr == '_')
483 assert(isxdigit(*ptr));
486 *val += (*ptr - '0');
488 *val += (tolower(*ptr) - 'a' + 10);
500 return (int)(ptr - curptr);
530 * Attempts to interpret a numeric symbol as a rational number, either as
531 * IEEE 754 single precision floating point or as a fixed point integer.
532 * On success it returns the number of characters processed and the value is
533 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
535 * Small has stricter definition for floating point numbers than most:
536 * o the value must start with a digit; ".5" is not a valid number, you
538 * o a period must appear in the value, even if an exponent is given; "2e3"
539 * is not a valid number, you should write "2.0e3"
540 * o at least one digit must follow the period; "6." is not a valid number,
541 * you should write "6.0"
544 ftoi(cell * val, char *curptr)
547 double fnum, ffrac, fmult;
548 unsigned long dnum, dbase;
551 assert(rational_digits >= 0 && rational_digits < 9);
552 for (i = 0, dbase = 1; i < rational_digits; i++)
557 if (!isdigit(*ptr)) /* should start with digit */
559 while (isdigit(*ptr) || *ptr == '_')
563 fnum = (fnum * 10.0) + (*ptr - '0');
564 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
569 return 0; /* there must be a period */
571 if (!isdigit(*ptr)) /* there must be at least one digit after the dot */
576 while (isdigit(*ptr) || *ptr == '_')
580 ffrac = (ffrac * 10.0) + (*ptr - '0');
581 fmult = fmult / 10.0;
583 dnum += (*ptr - '0') * dbase;
584 if (dbase == 0L && sc_rationaltag && rational_digits > 0
587 error(222); /* number of digits exceeds rational number precision */
593 fnum += ffrac * fmult; /* form the number so far */
595 { /* optional fractional part */
608 if (!isdigit(*ptr)) /* 'e' should be followed by a digit */
611 while (isdigit(*ptr))
613 exp = (exp * 10) + (*ptr - '0');
617 fmult = pow10(exp * sign);
619 fmult = pow(10, exp * sign);
622 dnum *= (unsigned long)(fmult + 0.5);
625 /* decide how to store the number */
626 if (sc_rationaltag == 0)
628 error(70); /* rational number support was not enabled */
631 else if (rational_digits == 0)
633 float f = (float) fnum;
635 *val = EMBRYO_FLOAT_TO_CELL(f);
637 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
638 * format (as mandated in the ANSI standard). Test this assumption anyway.
641 float test1 = 0.0, test2 = 50.0;
642 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
643 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
645 if (c1 != 0x00000000L)
648 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
649 "point math as embryo expects. this could be bad.\n"
651 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
653 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
654 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
657 else if (c2 != 0x42480000L)
660 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
661 "point math as embryo expects. This could be bad.\n"
663 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
665 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
666 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
678 return (int)(ptr - curptr);
683 * Reads in a number (binary, decimal or hexadecimal). It returns the number
684 * of characters processed or 0 if the symbol couldn't be interpreted as a
685 * number (in this case the argument "val" remains unchanged). This routine
686 * relies on the 'early dropout' implementation of the logical or (||)
689 * Note: the routine doesn't check for a sign (+ or -). The - is checked
690 * for at "hier2()" (in fact, it is viewed as an operator, not as a
691 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
694 number(cell * val, char *curptr)
699 if ((i = btoi(&value, curptr)) != 0 /* binary? */
700 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
701 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
708 return 0; /* else not a number */
713 chrcat(char *str, char chr)
715 str = strchr(str, '\0');
721 preproc_expr(cell * val, int *tag)
728 /* Disable staging; it should be disabled already because
729 * expressions may not be cut off half-way between conditional
730 * compilations. Reset the staging index, but keep the code
733 if (stgget(&idx, &code_index))
735 error(57); /* unfinished expression */
736 stgdel(0, code_index);
739 /* append a special symbol to the string, so the expression
740 * analyzer won't try to read a next line when it encounters
743 assert(strlen(pline) < sLINEMAX);
744 term = strchr(pline, '\0');
745 assert(term != NULL);
746 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
747 result = constexpr(val, tag); /* get value (or 0 on error) */
748 *term = '\0'; /* erase the token (if still present) */
749 lexclr(FALSE); /* clear any "pushed" tokens */
754 * Returns returns a pointer behind the closing quote or to the other
755 * character that caused the input to be ended.
758 getstring(char *dest, int max)
760 assert(dest != NULL);
762 while (*lptr <= ' ' && *lptr != '\0')
763 lptr++; /* skip whitespace */
766 error(37); /* invalid string */
773 while (*lptr != '"' && *lptr != '\0')
781 lptr++; /* skip closing " */
783 error(37); /* invalid string */
802 * Recognizes the compiler directives. The function returns:
803 * CMD_NONE the line must be processed
804 * CMD_TERM a pending expression must be completed before processing further lines
805 * Other value: the line must be skipped, because:
806 * CMD_CONDFALSE false "#if.." code
807 * CMD_EMPTYLINE line is empty
808 * CMD_INCLUDE the line contains a #include directive
809 * CMD_DEFINE the line contains a #subst directive
810 * CMD_IF the line contains a #if/#else/#endif directive
811 * CMD_DIRECTIVE the line contains some other compiler directive
813 * Global variables: iflevel, skiplevel, elsedone (altered)
825 while (*lptr <= ' ' && *lptr != '\0')
828 return CMD_EMPTYLINE; /* empty line */
830 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
831 /* compiler directive found */
832 indent_nowarn = TRUE; /* allow loose indentation" */
833 lexclr(FALSE); /* clear any "pushed" tokens */
834 /* on a pending expression, force to return a silent ';' token and force to
837 if (!sc_needsemicolon && stgget(&idx, &code_index))
842 tok = lex(&val, &str);
843 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
846 case tpIF: /* conditional compilation */
850 break; /* break out of switch */
851 preproc_expr(&val, NULL); /* get value (or 0 on error) */
858 if (iflevel == 0 && skiplevel == 0)
860 error(26); /* no matching #if */
865 if (elsedone == iflevel)
866 error(60); /* multiple #else directives between #if ... #endif */
868 if (skiplevel == iflevel)
870 else if (skiplevel == 0)
875 #if 0 /* ??? *really* need to use a stack here */
878 if (iflevel == 0 && skiplevel == 0)
880 error(26); /* no matching #if */
883 else if (elsedone == iflevel)
885 error(61); /* #elseif directive may not follow an #else */
890 preproc_expr(&val, NULL); /* get value (or 0 on error) */
892 skiplevel = iflevel; /* we weren't skipping, start skipping now */
894 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
895 /* else: we were skipping and condition is invalid -> keep skipping */
902 if (iflevel == 0 && skiplevel == 0)
909 if (skiplevel == iflevel)
911 if (elsedone == iflevel)
912 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
913 * the state whether an #else was seen per nesting level */
918 case tINCLUDE: /* #include directive */
926 char pathname[PATH_MAX];
928 lptr = getstring(pathname, sizeof pathname);
929 if (pathname[0] != '\0')
932 inpfname = strdup(pathname);
934 error(103); /* insufficient memory */
942 if (lex(&val, &str) != tNUMBER)
943 error(8); /* invalid/non-constant expression */
946 while (*lptr == ' ' && *lptr != '\0')
947 lptr++; /* skip whitespace */
950 char pathname[PATH_MAX];
952 lptr = getstring(pathname, sizeof pathname);
953 if (pathname[0] != '\0')
956 inpfname = strdup(pathname);
958 error(103); /* insufficient memory */
965 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
967 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
969 error(7); /* assertion failed */
976 if (lex(&val, &str) == tSYMBOL)
978 if (strcmp(str, "ctrlchar") == 0)
980 if (lex(&val, &str) != tNUMBER)
981 error(27); /* invalid character constant */
982 sc_ctrlchar = (char)val;
984 else if (strcmp(str, "compress") == 0)
988 preproc_expr(&val, NULL);
989 sc_compress = (int)val; /* switch code packing on/off */
991 else if (strcmp(str, "dynamic") == 0)
993 preproc_expr(&sc_stksize, NULL);
995 else if (strcmp(str, "library") == 0)
997 char name[sNAMEMAX + 1];
999 while (*lptr <= ' ' && *lptr != '\0')
1003 lptr = getstring(name, sizeof name);
1010 (i < (int)(sizeof(name))) &&
1016 if (name[0] == '\0')
1022 if (strlen(name) > sEXPMAX)
1023 error(220, name, sEXPMAX); /* exported symbol is truncated */
1024 /* add the name if it does not yet exist in the table */
1025 if (!find_constval(&libname_tab, name, 0))
1027 append_constval(&libname_tab, name, 0, 0);
1030 else if (strcmp(str, "pack") == 0)
1034 preproc_expr(&val, NULL); /* default = packed/unpacked */
1035 sc_packstr = (int)val;
1037 else if (strcmp(str, "rational") == 0)
1039 char name[sNAMEMAX + 1];
1043 /* first gather all information, start with the tag name */
1044 while ((*lptr <= ' ') && (*lptr != '\0'))
1047 (i < (int)(sizeof(name))) &&
1052 /* then the precision (for fixed point arithmetic) */
1053 while (*lptr <= ' ' && *lptr != '\0')
1057 preproc_expr(&digits, NULL);
1058 if (digits <= 0 || digits > 9)
1060 error(68); /* invalid rational number precision */
1066 /* add the tag (make it public) and check the values */
1067 i = sc_addtag(name);
1069 if (sc_rationaltag == 0
1070 || (sc_rationaltag == i
1071 && rational_digits == (int)digits))
1074 rational_digits = (int)digits;
1078 error(69); /* rational number format already set, can only be set once */
1081 else if (strcmp(str, "semicolon") == 0)
1085 preproc_expr(&val, NULL);
1086 sc_needsemicolon = (int)val;
1088 else if (strcmp(str, "tabsize") == 0)
1092 preproc_expr(&val, NULL);
1093 sc_tabsize = (int)val;
1095 else if (strcmp(str, "align") == 0)
1097 sc_alignnext = TRUE;
1099 else if (strcmp(str, "unused") == 0)
1101 char name[sNAMEMAX + 1];
1108 while ((*lptr <= ' ') && (*lptr != '\0'))
1111 (i < (int)(sizeof(name))) &&
1116 /* get the symbol */
1117 sym = findloc(name);
1119 sym = findglb(name);
1122 sym->usage |= uREAD;
1123 if (sym->ident == iVARIABLE
1124 || sym->ident == iREFERENCE
1125 || sym->ident == iARRAY
1126 || sym->ident == iREFARRAY)
1127 sym->usage |= uWRITTEN;
1131 error(17, name); /* undefined symbol */
1133 /* see if a comma follows the name */
1134 while (*lptr <= ' ' && *lptr != '\0')
1136 comma = (*lptr == ',');
1144 error(207); /* unknown #pragma */
1149 error(207); /* unknown #pragma */
1159 assert(inpf != NULL);
1160 if (inpf != inpf_org)
1168 /* write opcode to output file */
1172 while (*lptr <= ' ' && *lptr != '\0')
1174 for (i = 0; i < 40 && (isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1175 name[i] = (char)tolower(*lptr);
1180 code_idx += opcodes(1);
1181 /* write parameter (if any) */
1182 while (*lptr <= ' ' && *lptr != '\0')
1188 tok = lex(&val, &str);
1194 code_idx += opargs(1);
1200 if (!sym || (sym->ident != iFUNCTN
1201 && sym->ident != iREFFUNC
1202 && (sym->usage & uDEFINE) == 0))
1204 error(17, str); /* undefined symbol */
1208 outval(sym->addr, FALSE);
1209 /* mark symbol as "used", unknown whether for read or write */
1210 markusage(sym, uREAD | uWRITTEN);
1211 code_idx += opargs(1);
1217 extern char *sc_tokens[]; /* forward declaration */
1220 sprintf(s2, "%c", (char)tok);
1222 strcpy(s2, sc_tokens[tok - tFIRST]);
1223 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1233 #if !defined NO_DEFINE
1239 char *pattern, *substitution;
1241 int count, prefixlen;
1244 /* find the pattern to match */
1245 while (*lptr <= ' ' && *lptr != '\0')
1247 start = lptr; /* save starting point of the match pattern */
1249 while (*lptr > ' ' && *lptr != '\0')
1251 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1255 /* check pattern to match */
1256 if (!isalpha(*start) && *start != '_')
1258 error(74); /* pattern must start with an alphabetic character */
1261 /* store matched pattern */
1262 pattern = malloc(count + 1);
1264 error(103); /* insufficient memory */
1270 assert(*lptr != '\0');
1271 pattern[count++] = (char)litchar(&lptr, FALSE);
1273 pattern[count] = '\0';
1274 /* special case, erase trailing variable, because it could match anything */
1275 if (count >= 2 && isdigit(pattern[count - 1])
1276 && pattern[count - 2] == '%')
1277 pattern[count - 2] = '\0';
1278 /* find substitution string */
1279 while (*lptr <= ' ' && *lptr != '\0')
1281 start = lptr; /* save starting point of the match pattern */
1284 while (*lptr != '\0')
1286 /* keep position of the start of trailing whitespace */
1301 /* store matched substitution */
1302 substitution = malloc(count + 1); /* +1 for '\0' */
1304 error(103); /* insufficient memory */
1310 assert(*lptr != '\0');
1311 substitution[count++] = *lptr++;
1313 substitution[count] = '\0';
1314 /* check whether the definition already exists */
1315 for (prefixlen = 0, start = pattern;
1316 isalpha(*start) || isdigit(*start) || *start == '_';
1317 prefixlen++, start++)
1319 assert(prefixlen > 0);
1320 if ((def = find_subst(pattern, prefixlen)))
1322 if (strcmp(def->first, pattern) != 0
1323 || strcmp(def->second, substitution) != 0)
1324 error(201, pattern); /* redefinition of macro (non-identical) */
1325 delete_subst(pattern, prefixlen);
1327 /* add the pattern/substitution pair to the list */
1328 assert(pattern[0] != '\0');
1329 insert_subst(pattern, substitution, prefixlen);
1338 if (lex(&val, &str) == tSYMBOL)
1340 if (!delete_subst(str, strlen(str)))
1341 error(17, str); /* undefined symbol */
1345 error(20, str); /* invalid symbol name */
1352 error(31); /* unknown compiler directive */
1353 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1358 #if !defined NO_DEFINE
1360 is_startstring(char *string)
1362 if (*string == '\"' || *string == '\'')
1363 return TRUE; /* "..." */
1368 if (*string == '\"' || *string == '\'')
1369 return TRUE; /* !"..." */
1370 if (*string == sc_ctrlchar)
1373 if (*string == '\"' || *string == '\'')
1374 return TRUE; /* !\"..." */
1377 else if (*string == sc_ctrlchar)
1380 if (*string == '\"' || *string == '\'')
1381 return TRUE; /* \"..." */
1385 if (*string == '\"' || *string == '\'')
1386 return TRUE; /* \!"..." */
1394 skipstring(char *string)
1397 int rawstring = FALSE;
1399 while (*string == '!' || *string == sc_ctrlchar)
1401 rawstring = (*string == sc_ctrlchar);
1406 assert(endquote == '\"' || endquote == '\'');
1407 string++; /* skip open quote */
1408 while (*string != endquote && *string != '\0')
1409 litchar(&string, rawstring);
1414 skippgroup(char *string)
1417 char open = *string;
1436 close = '\0'; /* only to avoid a compiler warning */
1440 while (*string != close || nest > 0)
1442 if (*string == open)
1444 else if (*string == close)
1446 else if (is_startstring(string))
1447 string = skipstring(string);
1448 if (*string == '\0')
1456 strdel(char *str, size_t len)
1458 size_t length = strlen(str);
1462 memmove(str, str + len, length - len + 1); /* include EOS byte */
1467 strins(char *dest, char *src, size_t srclen)
1469 size_t destlen = strlen(dest);
1471 assert(srclen <= strlen(src));
1472 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1473 memcpy(dest, src, srclen);
1478 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1481 char *p, *s, *e, *args[10];
1482 int match, arg, len;
1484 memset(args, 0, sizeof args);
1486 /* check the length of the prefix */
1487 for (prefixlen = 0, s = pattern; isalpha(*s) || isdigit(*s) || *s == '_';
1490 assert(prefixlen > 0);
1491 assert(strncmp(line, pattern, prefixlen) == 0);
1493 /* pattern prefix matches; match the rest of the pattern, gather
1496 s = line + prefixlen;
1497 p = pattern + prefixlen;
1498 match = TRUE; /* so far, pattern matches */
1499 while (match && *s != '\0' && *p != '\0')
1507 assert(arg >= 0 && arg <= 9);
1508 p++; /* skip parameter id */
1510 /* match the source string up to the character after the digit
1511 * (skipping strings in the process
1514 while (*e != *p && *e != '\0' && *e != '\n')
1516 if (is_startstring(e)) /* skip strings */
1518 else if (strchr("({[", *e)) /* skip parenthized groups */
1521 e++; /* skip non-alphapetic character (or closing quote of
1522 * a string, or the closing paranthese of a group) */
1524 /* store the parameter (overrule any earlier) */
1528 args[arg] = malloc(len + 1);
1530 error(103); /* insufficient memory */
1531 strncpy(args[arg], s, len);
1532 args[arg][len] = '\0';
1533 /* character behind the pattern was matched too */
1538 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1539 && !sc_needsemicolon)
1541 s = e; /* allow a trailing ; in the pattern match to end of line */
1545 assert(*e == '\0' || *e == '\n');
1556 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1558 /* source may be ';' or end of the line */
1559 while (*s <= ' ' && *s != '\0')
1560 s++; /* skip white space */
1561 if (*s != ';' && *s != '\0')
1563 p++; /* skip the semicolon in the pattern */
1569 /* skip whitespace between two non-alphanumeric characters, except
1570 * for two identical symbols
1572 assert(p > pattern);
1573 if (!alphanum(*p) && *(p - 1) != *p)
1574 while (*s <= ' ' && *s != '\0')
1575 s++; /* skip white space */
1576 ch = litchar(&p, FALSE); /* this increments "p" */
1580 s++; /* this character matches */
1584 if (match && *p == '\0')
1586 /* if the last character to match is an alphanumeric character, the
1587 * current character in the source may not be alphanumeric
1589 assert(p > pattern);
1590 if (alphanum(*(p - 1)) && alphanum(*s))
1596 /* calculate the length of the substituted string */
1597 for (e = substitution, len = 0; *e != '\0'; e++)
1599 if (*e == '%' && isdigit(*(e + 1)))
1601 arg = *(e + 1) - '0';
1602 assert(arg >= 0 && arg <= 9);
1604 len += strlen(args[arg]);
1605 e++; /* skip %, digit is skipped later */
1612 /* check length of the string after substitution */
1613 if (strlen(line) + len - (int)(s - line) > buffersize)
1615 error(75); /* line too long */
1619 /* substitute pattern */
1620 strdel(line, (int)(s - line));
1621 for (e = substitution, s = line; *e != '\0'; e++)
1623 if (*e == '%' && isdigit(*(e + 1)))
1625 arg = *(e + 1) - '0';
1626 assert(arg >= 0 && arg <= 9);
1629 strins(s, args[arg], strlen(args[arg]));
1630 s += strlen(args[arg]);
1632 e++; /* skip %, digit is skipped later */
1643 for (arg = 0; arg < 10; arg++)
1651 substallpatterns(char *line, int buffersize)
1658 while (*start != '\0')
1660 /* find the start of a prefix (skip all non-alphabetic characters),
1663 while (!isalpha(*start) && *start != '_' && *start != '\0')
1666 if (is_startstring(start))
1668 start = skipstring(start);
1670 break; /* abort loop on error */
1672 start++; /* skip non-alphapetic character (or closing quote of a string) */
1675 break; /* abort loop on error */
1676 /* get the prefix (length), look for a matching definition */
1679 while (isalpha(*end) || isdigit(*end) || *end == '_')
1684 assert(prefixlen > 0);
1685 subst = find_subst(start, prefixlen);
1688 /* properly match the pattern and substitute */
1690 (start, buffersize - (start - line), subst->first,
1692 start = end; /* match failed, skip this prefix */
1693 /* match succeeded: do not update "start", because the substitution text
1694 * may be matched by other macros
1699 start = end; /* no macro with this prefix, skip this prefix */
1707 * Reads a line by readline() into "pline" and performs basic preprocessing:
1708 * deleting comments, skipping lines with false "#if.." code and recognizing
1709 * other compiler directives. There is an indirect recursion: lex() calls
1710 * preprocess() if a new line must be read, preprocess() calls command(),
1711 * which at his turn calls lex() to identify the token.
1713 * Global references: lptr (altered)
1715 * freading (referred to only)
1727 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1728 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1729 iscommand = command();
1730 if (iscommand != CMD_NONE)
1731 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1732 #if !defined NO_DEFINE
1733 if (iscommand == CMD_NONE)
1735 assert(lptr != term_expr);
1736 substallpatterns(pline, sLINEMAX);
1737 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1741 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1745 unpackedstring(char *lptr, int rawstring)
1747 while (*lptr != '\0')
1749 /* check for doublequotes indicating the end of the string */
1752 /* check whether there's another pair of quotes following.
1753 * If so, paste the two strings together, thus
1754 * "pants""off" becomes "pantsoff"
1756 if (*(lptr + 1) == '\"')
1763 { /* ignore '\a' (which was inserted at a line concatenation) */
1767 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1769 stowlit(0); /* terminate string */
1774 packedstring(char *lptr, int rawstring)
1779 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1781 while (*lptr != '\0')
1783 /* check for doublequotes indicating the end of the string */
1786 /* check whether there's another pair of quotes following.
1787 * If so, paste the two strings together, thus
1788 * "pants""off" becomes "pantsoff"
1790 if (*(lptr + 1) == '\"')
1797 { /* ignore '\a' (which was inserted at a line concatenation) */
1801 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1802 if (c >= (ucell) (1 << charbits))
1803 error(43); /* character constant exceeds range */
1804 val |= (c << 8 * i);
1810 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1812 /* save last code; make sure there is at least one terminating zero character */
1813 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1814 stowlit(val); /* at least one zero character in "val" */
1816 stowlit(0); /* add full cell of zeros */
1820 /* lex(lexvalue,lexsym) Lexical Analysis
1822 * lex() first deletes leading white space, then checks for multi-character
1823 * operators, keywords (including most compiler directives), numbers,
1824 * labels, symbols and literals (literal characters are converted to a number
1825 * and are returned as such). If every check fails, the line must contain
1826 * a single-character operator. So, lex() returns this character. In the other
1827 * case (something did match), lex() returns the number of the token. All
1828 * these tokens have been assigned numbers above 255.
1830 * Some tokens have "attributes":
1831 * tNUMBER the value of the number is return in "lexvalue".
1832 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1833 * encoding in "lexvalue".
1834 * tSYMBOL the first sNAMEMAX characters of the symbol are
1835 * stored in a buffer, a pointer to this buffer is
1836 * returned in "lexsym".
1837 * tLABEL the first sNAMEMAX characters of the label are
1838 * stored in a buffer, a pointer to this buffer is
1839 * returned in "lexsym".
1840 * tSTRING the string is stored in the literal pool, the index
1841 * in the literal pool to this string is stored in
1844 * lex() stores all information (the token found and possibly its attribute)
1845 * in global variables. This allows a token to be examined twice. If "_pushed"
1846 * is true, this information is returned.
1848 * Global references: lptr (altered)
1849 * fline (referred to only)
1850 * litidx (referred to only)
1851 * _lextok, _lexval, _lexstr
1857 static cell _lexval;
1858 static char _lexstr[sLINEMAX + 1];
1859 static int _lexnewline;
1864 stkidx = 0; /* index for pushstk() and popstk() */
1865 iflevel = 0; /* preprocessor: nesting of "#if" */
1866 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1867 icomment = FALSE; /* currently not in a multiline comment */
1868 _pushed = FALSE; /* no token pushed back into lex */
1869 _lexnewline = FALSE;
1872 char *sc_tokens[] = {
1873 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1874 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1876 "assert", "break", "case", "char", "const", "continue", "default",
1877 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1878 "if", "native", "new", "operator", "public", "return", "sizeof",
1879 "sleep", "static", "stock", "switch", "tagof", "while",
1880 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1881 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1882 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1883 "-label-", "-string-"
1887 lex(cell * lexvalue, char **lexsym)
1889 int i, toolong, newline, rawstring;
1894 _pushed = FALSE; /* reset "_pushed" flag */
1895 *lexvalue = _lexval;
1900 _lextok = 0; /* preset all values */
1903 *lexvalue = _lexval;
1905 _lexnewline = FALSE;
1909 newline = (lptr == pline); /* does lptr point to start of line buffer */
1910 while (*lptr <= ' ')
1911 { /* delete leading white space */
1914 preprocess(); /* preprocess resets "lptr" */
1917 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1918 return (_lextok = tENDEXPR);
1919 _lexnewline = TRUE; /* set this after preprocess(), because
1920 * preprocess() calls lex() recursively */
1931 for (i = 0; i < (int)(lptr - pline); i++)
1932 if (pline[i] == '\t' && sc_tabsize > 0)
1934 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1941 while (i <= tMIDDLE)
1942 { /* match multi-character operators */
1943 if (match(*tokptr, FALSE))
1952 { /* match reserved words and compiler directives */
1953 if (match(*tokptr, TRUE))
1956 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1963 if ((i = number(&_lexval, lptr)) != 0)
1966 *lexvalue = _lexval;
1969 else if ((i = ftoi(&_lexval, lptr)) != 0)
1971 _lextok = tRATIONAL;
1972 *lexvalue = _lexval;
1975 else if (alpha(*lptr))
1976 { /* symbol or label */
1977 /* Note: only sNAMEMAX characters are significant. The compiler
1978 * generates a warning if a symbol exceeds this length.
1983 while (alphanum(*lptr))
1994 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1995 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
1997 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
1999 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
2001 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
2003 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
2005 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
2006 lptr += 1; /* skip colon */
2009 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
2010 { /* unpacked string literal */
2012 rawstring = (*lptr == sc_ctrlchar);
2013 *lexvalue = _lexval = litidx;
2014 lptr += 1; /* skip double quote */
2016 lptr += 1; /* skip "escape" character too */
2018 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2021 lptr += 1; /* skip final quote */
2023 error(37); /* invalid (non-terminated) string */
2025 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2026 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2027 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2028 && *(lptr + 2) == '\"'))
2029 { /* packed string literal */
2031 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2032 *lexvalue = _lexval = litidx;
2033 lptr += 2; /* skip exclamation point and double quote */
2035 lptr += 1; /* skip "escape" character too */
2037 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2040 lptr += 1; /* skip final quote */
2042 error(37); /* invalid (non-terminated) string */
2044 else if (*lptr == '\'')
2045 { /* character literal */
2046 lptr += 1; /* skip quote */
2048 *lexvalue = _lexval = litchar(&lptr, FALSE);
2050 lptr += 1; /* skip final quote */
2052 error(27); /* invalid character constant (must be one character) */
2054 else if (*lptr == ';')
2055 { /* semicolumn resets "error" flag */
2058 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2062 _lextok = *lptr; /* if every match fails, return the character */
2063 lptr += 1; /* increase the "lptr" pointer */
2070 * Pushes a token back, so the next call to lex() will return the token
2071 * last examined, instead of a new token.
2073 * Only one token can be pushed back.
2075 * In fact, lex() already stores the information it finds into global
2076 * variables, so all that is to be done is set a flag that informs lex()
2077 * to read and return the information from these variables, rather than
2078 * to read in a new token from the input file.
2083 assert(_pushed == FALSE);
2089 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2090 * symbol (a not continue with some old one). This is required upon return
2091 * from Assembler mode.
2099 lptr = strchr(pline, '\0');
2100 assert(lptr != NULL);
2106 * This routine is useful if only a simple check is needed. If the token
2107 * differs from the one expected, it is pushed back.
2110 matchtoken(int token)
2116 tok = lex(&val, &str);
2117 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2121 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2123 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2135 * Returns additional information of a token after using "matchtoken()"
2136 * or needtoken(). It does no harm using this routine after a call to
2137 * "lex()", but lex() already returns the same information.
2139 * The token itself is the return value. Normally, this one is already known.
2142 tokeninfo(cell * val, char **str)
2144 /* if the token was pushed back, tokeninfo() returns the token and
2145 * parameters of the *next* token, not of the *current* token.
2155 * This routine checks for a required token and gives an error message if
2156 * it isn't there (and returns FALSE in that case).
2158 * Global references: _lextok;
2161 needtoken(int token)
2163 char s1[20], s2[20];
2165 if (matchtoken(token))
2171 /* token already pushed back */
2174 sprintf(s1, "%c", (char)token); /* single character token */
2176 strcpy(s1, sc_tokens[token - tFIRST]); /* multi-character symbol */
2178 strcpy(s2, "-end of file-");
2179 else if (_lextok < 256)
2180 sprintf(s2, "%c", (char)_lextok);
2182 strcpy(s2, sc_tokens[_lextok - tFIRST]);
2183 error(1, s1, s2); /* expected ..., but found ... */
2190 * Compares a series of characters from the input file with the characters
2191 * in "st" (that contains a token). If the token on the input file matches
2192 * "st", the input file pointer "lptr" is adjusted to point to the next
2193 * token, otherwise "lptr" remains unaltered.
2195 * If the parameter "end: is true, match() requires that the first character
2196 * behind the recognized token is non-alphanumeric.
2198 * Global references: lptr (altered)
2201 match(char *st, int end)
2216 { /* symbol must terminate with non-alphanumeric char */
2220 lptr = ptr; /* match found, skip symbol */
2226 * Stores a value into the literal queue. The literal queue is used for
2227 * literal strings used in functions and for initializing array variables.
2229 * Global references: litidx (altered)
2235 if (litidx >= litmax)
2239 litmax += sDEF_LITMAX;
2240 p = (cell *) realloc(litq, litmax * sizeof(cell));
2242 error(102, "literal table"); /* literal table overflow (fatal error) */
2245 assert(litidx < litmax);
2246 litq[litidx++] = value;
2251 * Return current literal character and increase the pointer to point
2252 * just behind this literal character.
2254 * Note: standard "escape sequences" are suported, but the backslash may be
2255 * replaced by another character; the syntax '\ddd' is supported,
2256 * but ddd must be decimal!
2259 litchar(char **lptr, int rawmode)
2262 unsigned char *cptr;
2264 cptr = (unsigned char *)*lptr;
2265 if (rawmode || *cptr != sc_ctrlchar)
2266 { /* no escape character */
2273 if (*cptr == sc_ctrlchar)
2275 c = *cptr; /* \\ == \ (the escape character itself) */
2282 case 'a': /* \a == audible alarm */
2286 case 'b': /* \b == backspace */
2290 case 'e': /* \e == escape */
2294 case 'f': /* \f == form feed */
2298 case 'n': /* \n == NewLine character */
2302 case 'r': /* \r == carriage return */
2306 case 't': /* \t == horizontal TAB */
2310 case 'v': /* \v == vertical TAB */
2314 case '\'': /* \' == ' (single quote) */
2315 case '"': /* \" == " (single quote) */
2316 case '%': /* \% == % (percent) */
2324 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2325 c = c * 10 + *cptr++ - '0';
2327 cptr++; /* swallow a trailing ';' */
2331 error(27); /* invalid character constant */
2336 *lptr = (char *)cptr;
2337 assert(c >= 0 && c < 256);
2343 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2344 * or an "at" sign ("@"). The "@" is an extension to standard C.
2349 return (isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2354 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2359 return (alpha(c) || isdigit(c));
2362 /* The local variable table must be searched backwards, so that the deepest
2363 * nesting of local variables is searched first. The simplest way to do
2364 * this is to insert all new items at the head of the list.
2365 * In the global list, the symbols are kept in sorted order, so that the
2366 * public functions are written in sorted order.
2369 add_symbol(symbol * root, symbol * entry, int sort)
2374 while (root->next && strcmp(entry->name, root->next->name) > 0)
2377 if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2382 memcpy(newsym, entry, sizeof(symbol));
2383 newsym->next = root->next;
2384 root->next = newsym;
2389 free_symbol(symbol * sym)
2393 /* free all sub-symbol allocated memory blocks, depending on the
2394 * kind of the symbol
2396 assert(sym != NULL);
2397 if (sym->ident == iFUNCTN)
2399 /* run through the argument list; "default array" arguments
2400 * must be freed explicitly; the tag list must also be freed */
2401 assert(sym->dim.arglist != NULL);
2402 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2404 if (arg->ident == iREFARRAY && arg->hasdefault)
2405 free(arg->defvalue.array.data);
2406 else if (arg->ident == iVARIABLE
2407 && ((arg->hasdefault & uSIZEOF) != 0
2408 || (arg->hasdefault & uTAGOF) != 0))
2409 free(arg->defvalue.size.symname);
2410 assert(arg->tags != NULL);
2413 free(sym->dim.arglist);
2415 assert(sym->refer != NULL);
2421 delete_symbol(symbol * root, symbol * sym)
2423 /* find the symbol and its predecessor
2424 * (this function assumes that you will never delete a symbol that is not
2425 * in the table pointed at by "root")
2427 assert(root != sym);
2428 while (root->next != sym)
2431 assert(root != NULL);
2434 /* unlink it, then free it */
2435 root->next = sym->next;
2440 delete_symbols(symbol * root, int level, int delete_labels,
2441 int delete_functions)
2445 /* erase only the symbols with a deeper nesting level than the
2446 * specified nesting level */
2450 if (sym->compound < level)
2452 if ((delete_labels || sym->ident != iLABEL)
2453 && (delete_functions || sym->ident != iFUNCTN
2454 || (sym->usage & uNATIVE) != 0) && (delete_functions
2455 || sym->ident != iCONSTEXPR
2456 || (sym->usage & uPREDEF) ==
2457 0) && (delete_functions
2463 root->next = sym->next;
2468 /* if the function was prototyped, but not implemented in this source,
2469 * mark it as such, so that its use can be flagged
2471 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2472 sym->usage |= uMISSING;
2473 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2474 || sym->ident == iARRAY)
2475 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2476 /* for user defined operators, also remove the "prototyped" flag, as
2477 * user-defined operators *must* be declared before use
2479 if (sym->ident == iFUNCTN && !isalpha(*sym->name)
2480 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2481 sym->usage &= ~uPROTOTYPED;
2482 root = sym; /* skip the symbol */
2487 /* The purpose of the hash is to reduce the frequency of a "name"
2488 * comparison (which is costly). There is little interest in avoiding
2489 * clusters in similar names, which is why this function is plain simple.
2492 namehash(char *name)
2494 unsigned char *ptr = (unsigned char *)name;
2495 int len = strlen(name);
2500 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2505 find_symbol(symbol * root, char *name, int fnumber)
2507 symbol *ptr = root->next;
2508 unsigned long hash = namehash(name);
2512 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2513 && !ptr->parent && (ptr->fnumber < 0
2514 || ptr->fnumber == fnumber))
2522 find_symbol_child(symbol * root, symbol * sym)
2524 symbol *ptr = root->next;
2528 if (ptr->parent == sym)
2535 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2536 * bywhom will be the function that uses a variable or that calls
2540 refer_symbol(symbol * entry, symbol * bywhom)
2544 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2545 assert(entry != NULL);
2546 assert(entry->refer != NULL);
2548 /* see if it is already there */
2549 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2552 if (count < entry->numrefers)
2554 assert(entry->refer[count] == bywhom);
2558 /* see if there is an empty spot in the referrer list */
2559 for (count = 0; count < entry->numrefers && entry->refer[count];
2562 assert(count <= entry->numrefers);
2563 if (count == entry->numrefers)
2566 int newsize = 2 * entry->numrefers;
2568 assert(newsize > 0);
2569 /* grow the referrer list */
2570 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2572 return FALSE; /* insufficient memory */
2573 /* initialize the new entries */
2574 entry->refer = refer;
2575 for (count = entry->numrefers; count < newsize; count++)
2576 entry->refer[count] = NULL;
2577 count = entry->numrefers; /* first empty spot */
2578 entry->numrefers = newsize;
2581 /* add the referrer */
2582 assert(entry->refer[count] == NULL);
2583 entry->refer[count] = bywhom;
2588 markusage(symbol * sym, int usage)
2590 sym->usage |= (char)usage;
2591 /* check if (global) reference must be added to the symbol */
2592 if ((usage & (uREAD | uWRITTEN)) != 0)
2594 /* only do this for global symbols */
2595 if (sym->vclass == sGLOBAL)
2597 /* "curfunc" should always be valid, since statements may not occurs
2598 * outside functions; in the case of syntax errors, however, the
2599 * compiler may arrive through this function
2602 refer_symbol(sym, curfunc);
2609 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2614 return find_symbol(&glbtab, name, fcurrent);
2619 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2620 * See add_symbol() how the deepest nesting level is searched first.
2625 return find_symbol(&loctab, name, -1);
2629 findconst(char *name)
2633 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2634 if (!sym || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2635 sym = find_symbol(&glbtab, name, fcurrent);
2636 if (!sym || sym->ident != iCONSTEXPR)
2638 assert(sym->parent == NULL); /* constants have no hierarchy */
2643 finddepend(symbol * parent)
2647 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2648 if (!sym) /* not found */
2649 sym = find_symbol_child(&glbtab, parent);
2655 * Adds a symbol to the symbol table (either global or local variables,
2656 * or global and local constants).
2659 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2661 symbol entry, **refer;
2663 /* global variables/constants/functions may only be defined once */
2664 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2665 || findglb(name) == NULL);
2666 /* labels may only be defined once */
2667 assert(ident != iLABEL || findloc(name) == NULL);
2669 /* create an empty referrer list */
2670 if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2672 error(103); /* insufficient memory */
2677 /* first fill in the entry */
2678 strcpy(entry.name, name);
2679 entry.hash = namehash(name);
2681 entry.vclass = (char)vclass;
2682 entry.ident = (char)ident;
2684 entry.usage = (char)usage;
2685 entry.compound = 0; /* may be overridden later */
2686 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2687 entry.numrefers = 1;
2688 entry.refer = refer;
2689 entry.parent = NULL;
2691 /* then insert it in the list */
2692 if (vclass == sGLOBAL)
2693 return add_symbol(&glbtab, &entry, TRUE);
2695 return add_symbol(&loctab, &entry, FALSE);
2699 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2700 int dim[], int numdim, int idxtag[])
2702 symbol *sym, *parent, *top;
2705 /* global variables may only be defined once */
2706 assert(vclass != sGLOBAL || (sym = findglb(name)) == NULL
2707 || (sym->usage & uDEFINE) == 0);
2709 if (ident == iARRAY || ident == iREFARRAY)
2712 sym = NULL; /* to avoid a compiler warning */
2713 for (level = 0; level < numdim; level++)
2715 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2716 top->dim.array.length = dim[level];
2717 top->dim.array.level = (short)(numdim - level - 1);
2718 top->x.idxtag = idxtag[level];
2719 top->parent = parent;
2727 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2734 * Return next available internal label number.
2744 * Converts a number to a hexadecimal string and returns a pointer to that
2750 static char itohstr[15]; /* hex number is 10 characters long at most */
2752 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2761 for (i = 0; i < max; i += 1)
2763 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2767 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2771 if (nibble[i] >= 10)
2772 *ptr++ = (char)('a' + (nibble[i] - 10));
2774 *ptr++ = (char)('0' + nibble[i]);
2777 *ptr = '\0'; /* and a zero-terminator */