1 /* Small compiler - File input, preprocessing and lexical analysis functions
3 * Copyright (c) ITB CompuPhase, 1997-2003
5 * This software is provided "as-is", without any express or implied warranty.
6 * In no event will the authors be held liable for any damages arising from
7 * the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software in
15 * a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 * 2. Altered source versions must be plainly marked as such, and must not be
18 * misrepresented as being the original software.
19 * 3. This notice may not be removed or altered from any source distribution.
35 #include "embryo_cc_sc.h"
38 static int match(char *st, int end);
39 static cell litchar(char **lptr, int rawmode);
40 static int alpha(char c);
42 static int icomment; /* currently in multiline comment? */
43 static int iflevel; /* nesting level if #if/#else/#endif */
44 static int skiplevel; /* level at which we started skipping */
45 static int elsedone; /* level at which we have seen an #else */
46 static char term_expr[] = "";
47 static int listline = -1; /* "current line" for the list file */
51 * Uses a LIFO stack to store information. The stack is used by doinclude(),
52 * doswitch() (to hold the state of "swactive") and some other routines.
54 * Porting note: I made the bold assumption that an integer will not be
55 * larger than a pointer (it may be smaller). That is, the stack element
56 * is typedef'ed as a pointer type, but I also store integers on it. See
59 * Global references: stack,stkidx (private to pushstk() and popstk())
61 static stkitem stack[sSTKMAX];
66 if (stkidx >= sSTKMAX)
67 error(102, "parser stack"); /* stack overflow (recursive include?) */
76 return (stkitem) - 1; /* stack is empty */
82 plungequalifiedfile(char *name)
84 static char *extensions[] = { ".inc", ".sma", ".small" };
92 fp = (FILE *) sc_opensrc(name);
93 ext = strchr(name, '\0'); /* save position */
96 /* try to append an extension */
97 strcpy(ext, extensions[ext_idx]);
98 fp = (FILE *) sc_opensrc(name);
100 *ext = '\0'; /* on failure, restore filename */
105 (ext_idx < (int)(sizeof extensions / sizeof extensions[0])));
108 *ext = '\0'; /* restore filename */
111 pushstk((stkitem) inpf);
112 pushstk((stkitem) inpfname); /* pointer to current file name */
113 pushstk((stkitem) curlibrary);
114 pushstk((stkitem) iflevel);
115 assert(skiplevel == 0);
116 pushstk((stkitem) icomment);
117 pushstk((stkitem) fcurrent);
118 pushstk((stkitem) fline);
119 inpfname = strdup(name); /* set name of include file */
121 error(103); /* insufficient memory */
122 inpf = fp; /* set input file pointer to include file */
124 fline = 0; /* set current line number to 0 */
127 setfile(inpfname, fcurrent);
128 listline = -1; /* force a #line directive when changing the file */
129 setactivefile(fcurrent);
134 plungefile(char *name, int try_currentpath, int try_includepaths)
141 result = plungequalifiedfile(name);
143 if (try_includepaths && name[0] != DIRSEP_CHAR)
145 for (i = 0; !result && (ptr = get_path(i)); i++)
149 strncpy(path, ptr, sizeof path);
150 path[sizeof path - 1] = '\0'; /* force '\0' termination */
151 strncat(path, name, sizeof(path) - strlen(path));
152 path[sizeof path - 1] = '\0';
153 result = plungequalifiedfile(path);
160 check_empty(char *lptr)
162 /* verifies that the string contains only whitespace */
163 while (*lptr <= ' ' && *lptr != '\0')
166 error(38); /* extra characters on line */
171 * Gets the name of an include file, pushes the old file on the stack and
172 * sets some options. This routine doesn't use lex(), since lex() doesn't
173 * recognize file names (and directories).
175 * Global references: inpf (altered)
183 char name[PATH_MAX], c;
186 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
188 if (*lptr == '<' || *lptr == '\"')
190 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
192 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
201 while ((*lptr != c) && (*lptr != '\0') && (i < (int)(sizeof(name) - 1))) /* find the end of the string */
203 while (i > 0 && name[i - 1] <= ' ')
204 i--; /* strip trailing whitespace */
205 assert((i >= 0) && (i < (int)(sizeof(name))));
206 name[i] = '\0'; /* zero-terminate the string */
209 { /* verify correct string termination */
210 error(37); /* invalid string */
214 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
216 /* Include files between "..." or without quotes are read from the current
217 * directory, or from a list of "include directories". Include files
218 * between <...> are only read from the list of include directories.
220 result = plungefile(name, (c != '>'), TRUE);
222 error(100, name); /* cannot read from ... (fatal error) */
227 * Reads in a new line from the input file pointed to by "inpf". readline()
228 * concatenates lines that end with a \ with the next line. If no more data
229 * can be read from the file, readline() attempts to pop off the previous file
230 * from the stack. If that fails too, it sets "freading" to 0.
232 * Global references: inpf,fline,inpfname,freading,icomment (altered)
240 if (lptr == term_expr)
246 if (!inpf || sc_eofsrc(inpf))
249 error(49); /* invalid line continuation */
250 if (inpf && inpf != inpf_org)
252 i = (int)(long)popstk();
254 { /* All's done; popstk() returns "stack is empty" */
257 /* when there is nothing more to read, the #if/#else stack should
258 * be empty and we should not be in a comment
260 assert(iflevel >= 0);
262 error(1, "#endif", "-end of file-");
264 error(1, "*/", "-end of file-");
268 fcurrent = (int)(long)popstk();
269 icomment = (int)(long)popstk();
270 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
271 iflevel = (int)(long)popstk();
272 curlibrary = (constvalue *) popstk();
273 free(inpfname); /* return memory allocated for the include file name */
274 inpfname = (char *)popstk();
275 inpf = (FILE *) popstk();
276 setactivefile(fcurrent);
277 listline = -1; /* force a #line directive when changing the file */
281 if (!sc_readsrc(inpf, line, num))
283 *line = '\0'; /* delete line */
288 /* check whether to erase leading spaces */
293 while (*ptr == ' ' || *ptr == '\t')
296 memmove(line, ptr, strlen(ptr) + 1);
299 /* check whether a full line was read */
300 if (!strchr(line, '\n') && !sc_eofsrc(inpf))
301 error(75); /* line too long */
302 /* check if the next line must be concatenated to this line */
303 if ((ptr = strchr(line, '\n')) && ptr > line)
305 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
307 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
308 ptr--; /* skip trailing whitespace */
312 /* set '\a' at the position of '\\' to make it possible to check
313 * for a line continuation in a single line comment (error 49)
316 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
320 line += strlen(line);
324 while (num >= 0 && cont);
329 * Replaces all comments from the line by space characters. It updates
330 * a global variable ("icomment") for multiline comments.
332 * This routine also supports the C++ extension for single line comments.
333 * These comments are started with "//" and end at the end of the line.
335 * Global references: icomment (private to "stripcom")
346 if (*line == '*' && *(line + 1) == '/')
348 icomment = FALSE; /* comment has ended */
349 *line = ' '; /* replace '*' and '/' characters by spaces */
355 if (*line == '/' && *(line + 1) == '*')
356 error(216); /* nested comment */
357 *line = ' '; /* replace comments by spaces */
363 if (*line == '/' && *(line + 1) == '*')
365 icomment = TRUE; /* start comment */
366 *line = ' '; /* replace '/' and '*' characters by spaces */
370 else if (*line == '/' && *(line + 1) == '/')
371 { /* comment to end of line */
372 if (strchr(line, '\a'))
373 error(49); /* invalid line continuation */
374 *line++ = '\n'; /* put "newline" at first slash */
375 *line = '\0'; /* put "zero-terminator" at second slash */
379 if (*line == '\"' || *line == '\'')
380 { /* leave literals unaltered */
381 c = *line; /* ending quote, single or double */
383 while ((*line != c || *(line - 1) == '\\')
386 line += 1; /* skip final quote */
399 * Attempts to interpret a numeric symbol as a boolean value. On success
400 * it returns the number of characters processed (so the line pointer can be
401 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
404 * A boolean value must start with "0b"
407 btoi(cell * val, char *curptr)
413 if (*ptr == '0' && *(ptr + 1) == 'b')
416 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
419 *val = (*val << 1) | (*ptr - '0');
427 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
430 return (int)(ptr - curptr);
435 * Attempts to interpret a numeric symbol as a decimal value. On success
436 * it returns the number of characters processed and the value is stored in
437 * "val". Otherwise it returns 0 and "val" is garbage.
440 dtoi(cell * val, char *curptr)
446 if (!sc_isdigit(*ptr)) /* should start with digit */
448 while (sc_isdigit(*ptr) || *ptr == '_')
451 *val = (*val * 10) + (*ptr - '0');
454 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
456 if (*ptr == '.' && sc_isdigit(*(ptr + 1)))
457 return 0; /* but a fractional part must not be present */
458 return (int)(ptr - curptr);
463 * Attempts to interpret a numeric symbol as a hexadecimal value. On
464 * success it returns the number of characters processed and the value is
465 * stored in "val". Otherwise it return 0 and "val" is garbage.
468 htoi(cell * val, char *curptr)
474 if (!sc_isdigit(*ptr)) /* should start with digit */
476 if (*ptr == '0' && *(ptr + 1) == 'x')
477 { /* C style hexadecimal notation */
479 while (sc_isxdigit(*ptr) || *ptr == '_')
483 assert(sc_isxdigit(*ptr));
485 if (sc_isdigit(*ptr))
486 *val += (*ptr - '0');
488 *val += (tolower(*ptr) - 'a' + 10);
500 return (int)(ptr - curptr);
530 * Attempts to interpret a numeric symbol as a rational number, either as
531 * IEEE 754 single precision floating point or as a fixed point integer.
532 * On success it returns the number of characters processed and the value is
533 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
535 * Small has stricter definition for floating point numbers than most:
536 * o the value must start with a digit; ".5" is not a valid number, you
538 * o a period must appear in the value, even if an exponent is given; "2e3"
539 * is not a valid number, you should write "2.0e3"
540 * o at least one digit must follow the period; "6." is not a valid number,
541 * you should write "6.0"
544 ftoi(cell * val, char *curptr)
547 double fnum, ffrac, fmult;
548 unsigned long dnum, dbase;
551 assert(rational_digits >= 0 && rational_digits < 9);
552 for (i = 0, dbase = 1; i < rational_digits; i++)
557 if (!sc_isdigit(*ptr)) /* should start with digit */
559 while (sc_isdigit(*ptr) || *ptr == '_')
563 fnum = (fnum * 10.0) + (*ptr - '0');
564 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
569 return 0; /* there must be a period */
571 if (!sc_isdigit(*ptr)) /* there must be at least one digit after the dot */
576 while (sc_isdigit(*ptr) || *ptr == '_')
580 ffrac = (ffrac * 10.0) + (*ptr - '0');
581 fmult = fmult / 10.0;
583 dnum += (*ptr - '0') * dbase;
584 if (dbase == 0L && sc_rationaltag && rational_digits > 0
587 error(222); /* number of digits exceeds rational number precision */
593 fnum += ffrac * fmult; /* form the number so far */
595 { /* optional fractional part */
608 if (!sc_isdigit(*ptr)) /* 'e' should be followed by a digit */
611 while (sc_isdigit(*ptr))
613 exp = (exp * 10) + (*ptr - '0');
617 fmult = pow10(exp * sign);
619 fmult = pow(10, exp * sign);
622 dnum *= (unsigned long)(fmult + 0.5);
625 /* decide how to store the number */
626 if (sc_rationaltag == 0)
628 error(70); /* rational number support was not enabled */
631 else if (rational_digits == 0)
633 float f = (float) fnum;
635 *val = EMBRYO_FLOAT_TO_CELL(f);
637 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
638 * format (as mandated in the ANSI standard). Test this assumption anyway.
641 float test1 = 0.0, test2 = 50.0;
642 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
643 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
645 if (c1 != 0x00000000L)
648 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
649 "point math as embryo expects. this could be bad.\n"
651 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
653 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
654 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
657 else if (c2 != 0x42480000L)
660 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
661 "point math as embryo expects. This could be bad.\n"
663 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
665 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
666 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
678 return (int)(ptr - curptr);
683 * Reads in a number (binary, decimal or hexadecimal). It returns the number
684 * of characters processed or 0 if the symbol couldn't be interpreted as a
685 * number (in this case the argument "val" remains unchanged). This routine
686 * relies on the 'early dropout' implementation of the logical or (||)
689 * Note: the routine doesn't check for a sign (+ or -). The - is checked
690 * for at "hier2()" (in fact, it is viewed as an operator, not as a
691 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
694 number(cell * val, char *curptr)
699 if ((i = btoi(&value, curptr)) != 0 /* binary? */
700 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
701 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
708 return 0; /* else not a number */
713 chrcat(char *str, char chr)
715 str = strchr(str, '\0');
721 preproc_expr(cell * val, int *tag)
728 /* Disable staging; it should be disabled already because
729 * expressions may not be cut off half-way between conditional
730 * compilations. Reset the staging index, but keep the code
733 if (stgget(&idx, &code_index))
735 error(57); /* unfinished expression */
736 stgdel(0, code_index);
739 /* append a special symbol to the string, so the expression
740 * analyzer won't try to read a next line when it encounters
743 assert(strlen(pline) < sLINEMAX);
744 term = strchr(pline, '\0');
745 assert(term != NULL);
746 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
747 result = constexpr(val, tag); /* get value (or 0 on error) */
748 *term = '\0'; /* erase the token (if still present) */
749 lexclr(FALSE); /* clear any "pushed" tokens */
754 * Returns returns a pointer behind the closing quote or to the other
755 * character that caused the input to be ended.
758 getstring(char *dest, int max)
760 assert(dest != NULL);
762 while (*lptr <= ' ' && *lptr != '\0')
763 lptr++; /* skip whitespace */
766 error(37); /* invalid string */
773 while (*lptr != '"' && *lptr != '\0')
781 lptr++; /* skip closing " */
783 error(37); /* invalid string */
802 * Recognizes the compiler directives. The function returns:
803 * CMD_NONE the line must be processed
804 * CMD_TERM a pending expression must be completed before processing further lines
805 * Other value: the line must be skipped, because:
806 * CMD_CONDFALSE false "#if.." code
807 * CMD_EMPTYLINE line is empty
808 * CMD_INCLUDE the line contains a #include directive
809 * CMD_DEFINE the line contains a #subst directive
810 * CMD_IF the line contains a #if/#else/#endif directive
811 * CMD_DIRECTIVE the line contains some other compiler directive
813 * Global variables: iflevel, skiplevel, elsedone (altered)
825 while (*lptr <= ' ' && *lptr != '\0')
828 return CMD_EMPTYLINE; /* empty line */
830 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
831 /* compiler directive found */
832 indent_nowarn = TRUE; /* allow loose indentation" */
833 lexclr(FALSE); /* clear any "pushed" tokens */
834 /* on a pending expression, force to return a silent ';' token and force to
837 if (!sc_needsemicolon && stgget(&idx, &code_index))
842 tok = lex(&val, &str);
843 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
846 case tpIF: /* conditional compilation */
850 break; /* break out of switch */
851 preproc_expr(&val, NULL); /* get value (or 0 on error) */
858 if (iflevel == 0 && skiplevel == 0)
860 error(26); /* no matching #if */
865 if (elsedone == iflevel)
866 error(60); /* multiple #else directives between #if ... #endif */
868 if (skiplevel == iflevel)
870 else if (skiplevel == 0)
875 #if 0 /* ??? *really* need to use a stack here */
878 if (iflevel == 0 && skiplevel == 0)
880 error(26); /* no matching #if */
883 else if (elsedone == iflevel)
885 error(61); /* #elseif directive may not follow an #else */
890 preproc_expr(&val, NULL); /* get value (or 0 on error) */
892 skiplevel = iflevel; /* we weren't skipping, start skipping now */
894 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
895 /* else: we were skipping and condition is invalid -> keep skipping */
902 if (iflevel == 0 && skiplevel == 0)
909 if (skiplevel == iflevel)
911 if (elsedone == iflevel)
912 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
913 * the state whether an #else was seen per nesting level */
918 case tINCLUDE: /* #include directive */
926 char pathname[PATH_MAX];
928 lptr = getstring(pathname, sizeof pathname);
929 if (pathname[0] != '\0')
932 inpfname = strdup(pathname);
934 error(103); /* insufficient memory */
942 if (lex(&val, &str) != tNUMBER)
943 error(8); /* invalid/non-constant expression */
946 while (*lptr == ' ' && *lptr != '\0')
947 lptr++; /* skip whitespace */
950 char pathname[PATH_MAX];
952 lptr = getstring(pathname, sizeof pathname);
953 if (pathname[0] != '\0')
956 inpfname = strdup(pathname);
958 error(103); /* insufficient memory */
965 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
967 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
969 error(7); /* assertion failed */
976 if (lex(&val, &str) == tSYMBOL)
978 if (strcmp(str, "ctrlchar") == 0)
980 if (lex(&val, &str) != tNUMBER)
981 error(27); /* invalid character constant */
982 sc_ctrlchar = (char)val;
984 else if (strcmp(str, "compress") == 0)
988 preproc_expr(&val, NULL);
989 sc_compress = (int)val; /* switch code packing on/off */
991 else if (strcmp(str, "dynamic") == 0)
993 preproc_expr(&sc_stksize, NULL);
995 else if (strcmp(str, "library") == 0)
997 char name[sNAMEMAX + 1];
999 while (*lptr <= ' ' && *lptr != '\0')
1003 lptr = getstring(name, sizeof name);
1010 (i < (int)(sizeof(name))) &&
1016 if (name[0] == '\0')
1022 if (strlen(name) > sEXPMAX)
1023 error(220, name, sEXPMAX); /* exported symbol is truncated */
1024 /* add the name if it does not yet exist in the table */
1025 if (!find_constval(&libname_tab, name, 0))
1027 append_constval(&libname_tab, name, 0, 0);
1030 else if (strcmp(str, "pack") == 0)
1034 preproc_expr(&val, NULL); /* default = packed/unpacked */
1035 sc_packstr = (int)val;
1037 else if (strcmp(str, "rational") == 0)
1039 char name[sNAMEMAX + 1];
1043 /* first gather all information, start with the tag name */
1044 while ((*lptr <= ' ') && (*lptr != '\0'))
1047 (i < (int)(sizeof(name)) - 1) &&
1052 /* then the precision (for fixed point arithmetic) */
1053 while (*lptr <= ' ' && *lptr != '\0')
1057 preproc_expr(&digits, NULL);
1058 if (digits <= 0 || digits > 9)
1060 error(68); /* invalid rational number precision */
1066 /* add the tag (make it public) and check the values */
1067 i = sc_addtag(name);
1069 if (sc_rationaltag == 0
1070 || (sc_rationaltag == i
1071 && rational_digits == (int)digits))
1074 rational_digits = (int)digits;
1078 error(69); /* rational number format already set, can only be set once */
1081 else if (strcmp(str, "semicolon") == 0)
1085 preproc_expr(&val, NULL);
1086 sc_needsemicolon = (int)val;
1088 else if (strcmp(str, "tabsize") == 0)
1092 preproc_expr(&val, NULL);
1093 sc_tabsize = (int)val;
1095 else if (strcmp(str, "align") == 0)
1097 sc_alignnext = TRUE;
1099 else if (strcmp(str, "unused") == 0)
1101 char name[sNAMEMAX + 1];
1108 while ((*lptr <= ' ') && (*lptr != '\0'))
1111 (i < (int)(sizeof(name)) - 1) &&
1112 (sc_isalpha(*lptr));
1116 /* get the symbol */
1117 sym = findloc(name);
1119 sym = findglb(name);
1122 sym->usage |= uREAD;
1123 if (sym->ident == iVARIABLE
1124 || sym->ident == iREFERENCE
1125 || sym->ident == iARRAY
1126 || sym->ident == iREFARRAY)
1127 sym->usage |= uWRITTEN;
1131 error(17, name); /* undefined symbol */
1133 /* see if a comma follows the name */
1134 while (*lptr <= ' ' && *lptr != '\0')
1136 comma = (*lptr == ',');
1144 error(207); /* unknown #pragma */
1149 error(207); /* unknown #pragma */
1159 assert(inpf != NULL);
1160 if (inpf != inpf_org)
1168 /* write opcode to output file */
1172 while (*lptr <= ' ' && *lptr != '\0')
1174 for (i = 0; i < 40 && (sc_isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1175 name[i] = (char)tolower(*lptr);
1180 code_idx += opcodes(1);
1181 /* write parameter (if any) */
1182 while (*lptr <= ' ' && *lptr != '\0')
1188 tok = lex(&val, &str);
1194 code_idx += opargs(1);
1200 if (!sym || (sym->ident != iFUNCTN
1201 && sym->ident != iREFFUNC
1202 && (sym->usage & uDEFINE) == 0))
1204 error(17, str); /* undefined symbol */
1208 outval(sym->addr, FALSE);
1209 /* mark symbol as "used", unknown whether for read or write */
1210 markusage(sym, uREAD | uWRITTEN);
1211 code_idx += opargs(1);
1217 extern char *sc_tokens[]; /* forward declaration */
1220 sprintf(s2, "%c", (char)tok);
1223 strncpy(s2, sc_tokens[tok - tFIRST], 19);
1226 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1236 #if !defined NO_DEFINE
1242 char *pattern, *substitution;
1244 int count, prefixlen;
1247 /* find the pattern to match */
1248 while (*lptr <= ' ' && *lptr != '\0')
1250 start = lptr; /* save starting point of the match pattern */
1252 while (*lptr > ' ' && *lptr != '\0')
1254 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1258 /* check pattern to match */
1259 if (!sc_isalpha(*start) && *start != '_')
1261 error(74); /* pattern must start with an alphabetic character */
1264 /* store matched pattern */
1265 pattern = malloc(count + 1);
1267 error(103); /* insufficient memory */
1273 assert(*lptr != '\0');
1274 pattern[count++] = (char)litchar(&lptr, FALSE);
1276 pattern[count] = '\0';
1277 /* special case, erase trailing variable, because it could match anything */
1278 if (count >= 2 && sc_isdigit(pattern[count - 1])
1279 && pattern[count - 2] == '%')
1280 pattern[count - 2] = '\0';
1281 /* find substitution string */
1282 while (*lptr <= ' ' && *lptr != '\0')
1284 start = lptr; /* save starting point of the match pattern */
1287 while (*lptr != '\0')
1289 /* keep position of the start of trailing whitespace */
1304 /* store matched substitution */
1305 substitution = malloc(count + 1); /* +1 for '\0' */
1307 error(103); /* insufficient memory */
1313 assert(*lptr != '\0');
1314 substitution[count++] = *lptr++;
1316 substitution[count] = '\0';
1317 /* check whether the definition already exists */
1318 for (prefixlen = 0, start = pattern;
1319 sc_isalpha(*start) || sc_isdigit(*start) || *start == '_';
1320 prefixlen++, start++)
1322 assert(prefixlen > 0);
1323 if ((def = find_subst(pattern, prefixlen)))
1325 if (strcmp(def->first, pattern) != 0
1326 || strcmp(def->second, substitution) != 0)
1327 error(201, pattern); /* redefinition of macro (non-identical) */
1328 delete_subst(pattern, prefixlen);
1330 /* add the pattern/substitution pair to the list */
1331 assert(pattern[0] != '\0');
1332 insert_subst(pattern, substitution, prefixlen);
1341 if (lex(&val, &str) == tSYMBOL)
1343 if (!delete_subst(str, strlen(str)))
1344 error(17, str); /* undefined symbol */
1348 error(20, str); /* invalid symbol name */
1355 error(31); /* unknown compiler directive */
1356 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1361 #if !defined NO_DEFINE
1363 is_startstring(char *string)
1365 if (*string == '\"' || *string == '\'')
1366 return TRUE; /* "..." */
1371 if (*string == '\"' || *string == '\'')
1372 return TRUE; /* !"..." */
1373 if (*string == sc_ctrlchar)
1376 if (*string == '\"' || *string == '\'')
1377 return TRUE; /* !\"..." */
1380 else if (*string == sc_ctrlchar)
1383 if (*string == '\"' || *string == '\'')
1384 return TRUE; /* \"..." */
1388 if (*string == '\"' || *string == '\'')
1389 return TRUE; /* \!"..." */
1397 skipstring(char *string)
1400 int rawstring = FALSE;
1402 while (*string == '!' || *string == sc_ctrlchar)
1404 rawstring = (*string == sc_ctrlchar);
1409 assert(endquote == '\"' || endquote == '\'');
1410 string++; /* skip open quote */
1411 while (*string != endquote && *string != '\0')
1412 litchar(&string, rawstring);
1417 skippgroup(char *string)
1420 char open = *string;
1439 close = '\0'; /* only to avoid a compiler warning */
1443 while (*string != close || nest > 0)
1445 if (*string == open)
1447 else if (*string == close)
1449 else if (is_startstring(string))
1450 string = skipstring(string);
1451 if (*string == '\0')
1459 strdel(char *str, size_t len)
1461 size_t length = strlen(str);
1465 memmove(str, str + len, length - len + 1); /* include EOS byte */
1470 strins(char *dest, char *src, size_t srclen)
1472 size_t destlen = strlen(dest);
1474 assert(srclen <= strlen(src));
1475 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1476 memcpy(dest, src, srclen);
1481 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1484 char *p, *s, *e, *args[10];
1485 int match, arg, len;
1487 memset(args, 0, sizeof args);
1489 /* check the length of the prefix */
1490 for (prefixlen = 0, s = pattern; sc_isalpha(*s) || sc_isdigit(*s) || *s == '_';
1493 assert(prefixlen > 0);
1494 assert(strncmp(line, pattern, prefixlen) == 0);
1496 /* pattern prefix matches; match the rest of the pattern, gather
1499 s = line + prefixlen;
1500 p = pattern + prefixlen;
1501 match = TRUE; /* so far, pattern matches */
1502 while (match && *s != '\0' && *p != '\0')
1510 assert(arg >= 0 && arg <= 9);
1511 p++; /* skip parameter id */
1513 /* match the source string up to the character after the digit
1514 * (skipping strings in the process
1517 while (*e != *p && *e != '\0' && *e != '\n')
1519 if (is_startstring(e)) /* skip strings */
1521 else if (strchr("({[", *e)) /* skip parenthized groups */
1524 e++; /* skip non-alphapetic character (or closing quote of
1525 * a string, or the closing paranthese of a group) */
1527 /* store the parameter (overrule any earlier) */
1531 args[arg] = malloc(len + 1);
1533 error(103); /* insufficient memory */
1534 strncpy(args[arg], s, len);
1535 args[arg][len] = '\0';
1536 /* character behind the pattern was matched too */
1541 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1542 && !sc_needsemicolon)
1544 s = e; /* allow a trailing ; in the pattern match to end of line */
1548 assert(*e == '\0' || *e == '\n');
1559 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1561 /* source may be ';' or end of the line */
1562 while (*s <= ' ' && *s != '\0')
1563 s++; /* skip white space */
1564 if (*s != ';' && *s != '\0')
1566 p++; /* skip the semicolon in the pattern */
1572 /* skip whitespace between two non-alphanumeric characters, except
1573 * for two identical symbols
1575 assert(p > pattern);
1576 if (!alphanum(*p) && *(p - 1) != *p)
1577 while (*s <= ' ' && *s != '\0')
1578 s++; /* skip white space */
1579 ch = litchar(&p, FALSE); /* this increments "p" */
1583 s++; /* this character matches */
1587 if (match && *p == '\0')
1589 /* if the last character to match is an alphanumeric character, the
1590 * current character in the source may not be alphanumeric
1592 assert(p > pattern);
1593 if (alphanum(*(p - 1)) && alphanum(*s))
1599 /* calculate the length of the substituted string */
1600 for (e = substitution, len = 0; *e != '\0'; e++)
1602 if (*e == '%' && sc_isdigit(*(e + 1)))
1604 arg = *(e + 1) - '0';
1605 assert(arg >= 0 && arg <= 9);
1607 len += strlen(args[arg]);
1608 e++; /* skip %, digit is skipped later */
1615 /* check length of the string after substitution */
1616 if (strlen(line) + len - (int)(s - line) > buffersize)
1618 error(75); /* line too long */
1622 /* substitute pattern */
1623 strdel(line, (int)(s - line));
1624 for (e = substitution, s = line; *e != '\0'; e++)
1626 if (*e == '%' && sc_isdigit(*(e + 1)))
1628 arg = *(e + 1) - '0';
1629 assert(arg >= 0 && arg <= 9);
1632 strins(s, args[arg], strlen(args[arg]));
1633 s += strlen(args[arg]);
1635 e++; /* skip %, digit is skipped later */
1646 for (arg = 0; arg < 10; arg++)
1654 substallpatterns(char *line, int buffersize)
1661 while (*start != '\0')
1663 /* find the start of a prefix (skip all non-alphabetic characters),
1666 while (!sc_isalpha(*start) && *start != '_' && *start != '\0')
1669 if (is_startstring(start))
1671 start = skipstring(start);
1673 break; /* abort loop on error */
1675 start++; /* skip non-alphapetic character (or closing quote of a string) */
1678 break; /* abort loop on error */
1679 /* get the prefix (length), look for a matching definition */
1682 while (sc_isalpha(*end) || sc_isdigit(*end) || *end == '_')
1687 assert(prefixlen > 0);
1688 subst = find_subst(start, prefixlen);
1691 /* properly match the pattern and substitute */
1693 (start, buffersize - (start - line), subst->first,
1695 start = end; /* match failed, skip this prefix */
1696 /* match succeeded: do not update "start", because the substitution text
1697 * may be matched by other macros
1702 start = end; /* no macro with this prefix, skip this prefix */
1710 * Reads a line by readline() into "pline" and performs basic preprocessing:
1711 * deleting comments, skipping lines with false "#if.." code and recognizing
1712 * other compiler directives. There is an indirect recursion: lex() calls
1713 * preprocess() if a new line must be read, preprocess() calls command(),
1714 * which at his turn calls lex() to identify the token.
1716 * Global references: lptr (altered)
1718 * freading (referred to only)
1730 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1731 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1732 iscommand = command();
1733 if (iscommand != CMD_NONE)
1734 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1735 #if !defined NO_DEFINE
1736 if (iscommand == CMD_NONE)
1738 assert(lptr != term_expr);
1739 substallpatterns(pline, sLINEMAX);
1740 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1744 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1748 unpackedstring(char *lptr, int rawstring)
1750 while (*lptr != '\0')
1752 /* check for doublequotes indicating the end of the string */
1755 /* check whether there's another pair of quotes following.
1756 * If so, paste the two strings together, thus
1757 * "pants""off" becomes "pantsoff"
1759 if (*(lptr + 1) == '\"')
1766 { /* ignore '\a' (which was inserted at a line concatenation) */
1770 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1772 stowlit(0); /* terminate string */
1777 packedstring(char *lptr, int rawstring)
1782 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1784 while (*lptr != '\0')
1786 /* check for doublequotes indicating the end of the string */
1789 /* check whether there's another pair of quotes following.
1790 * If so, paste the two strings together, thus
1791 * "pants""off" becomes "pantsoff"
1793 if (*(lptr + 1) == '\"')
1800 { /* ignore '\a' (which was inserted at a line concatenation) */
1804 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1805 if (c >= (ucell) (1 << charbits))
1806 error(43); /* character constant exceeds range */
1807 val |= (c << 8 * i);
1813 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1815 /* save last code; make sure there is at least one terminating zero character */
1816 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1817 stowlit(val); /* at least one zero character in "val" */
1819 stowlit(0); /* add full cell of zeros */
1823 /* lex(lexvalue,lexsym) Lexical Analysis
1825 * lex() first deletes leading white space, then checks for multi-character
1826 * operators, keywords (including most compiler directives), numbers,
1827 * labels, symbols and literals (literal characters are converted to a number
1828 * and are returned as such). If every check fails, the line must contain
1829 * a single-character operator. So, lex() returns this character. In the other
1830 * case (something did match), lex() returns the number of the token. All
1831 * these tokens have been assigned numbers above 255.
1833 * Some tokens have "attributes":
1834 * tNUMBER the value of the number is return in "lexvalue".
1835 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1836 * encoding in "lexvalue".
1837 * tSYMBOL the first sNAMEMAX characters of the symbol are
1838 * stored in a buffer, a pointer to this buffer is
1839 * returned in "lexsym".
1840 * tLABEL the first sNAMEMAX characters of the label are
1841 * stored in a buffer, a pointer to this buffer is
1842 * returned in "lexsym".
1843 * tSTRING the string is stored in the literal pool, the index
1844 * in the literal pool to this string is stored in
1847 * lex() stores all information (the token found and possibly its attribute)
1848 * in global variables. This allows a token to be examined twice. If "_pushed"
1849 * is true, this information is returned.
1851 * Global references: lptr (altered)
1852 * fline (referred to only)
1853 * litidx (referred to only)
1854 * _lextok, _lexval, _lexstr
1860 static cell _lexval;
1861 static char _lexstr[sLINEMAX + 1];
1862 static int _lexnewline;
1867 stkidx = 0; /* index for pushstk() and popstk() */
1868 iflevel = 0; /* preprocessor: nesting of "#if" */
1869 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1870 icomment = FALSE; /* currently not in a multiline comment */
1871 _pushed = FALSE; /* no token pushed back into lex */
1872 _lexnewline = FALSE;
1875 char *sc_tokens[] = {
1876 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1877 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1879 "assert", "break", "case", "char", "const", "continue", "default",
1880 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1881 "if", "native", "new", "operator", "public", "return", "sizeof",
1882 "sleep", "static", "stock", "switch", "tagof", "while",
1883 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1884 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1885 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1886 "-label-", "-string-"
1890 lex(cell * lexvalue, char **lexsym)
1892 int i, toolong, newline, rawstring;
1897 _pushed = FALSE; /* reset "_pushed" flag */
1898 *lexvalue = _lexval;
1903 _lextok = 0; /* preset all values */
1906 *lexvalue = _lexval;
1908 _lexnewline = FALSE;
1912 newline = (lptr == pline); /* does lptr point to start of line buffer */
1913 while (*lptr <= ' ')
1914 { /* delete leading white space */
1917 preprocess(); /* preprocess resets "lptr" */
1920 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1921 return (_lextok = tENDEXPR);
1922 _lexnewline = TRUE; /* set this after preprocess(), because
1923 * preprocess() calls lex() recursively */
1934 for (i = 0; i < (int)(lptr - pline); i++)
1935 if (pline[i] == '\t' && sc_tabsize > 0)
1937 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1944 while (i <= tMIDDLE)
1945 { /* match multi-character operators */
1946 if (match(*tokptr, FALSE))
1955 { /* match reserved words and compiler directives */
1956 if (match(*tokptr, TRUE))
1959 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1966 if ((i = number(&_lexval, lptr)) != 0)
1969 *lexvalue = _lexval;
1972 else if ((i = ftoi(&_lexval, lptr)) != 0)
1974 _lextok = tRATIONAL;
1975 *lexvalue = _lexval;
1978 else if (alpha(*lptr))
1979 { /* symbol or label */
1980 /* Note: only sNAMEMAX characters are significant. The compiler
1981 * generates a warning if a symbol exceeds this length.
1986 while (alphanum(*lptr))
1997 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1998 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
2000 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
2002 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
2004 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
2006 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
2008 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
2009 lptr += 1; /* skip colon */
2012 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
2013 { /* unpacked string literal */
2015 rawstring = (*lptr == sc_ctrlchar);
2016 *lexvalue = _lexval = litidx;
2017 lptr += 1; /* skip double quote */
2019 lptr += 1; /* skip "escape" character too */
2021 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2024 lptr += 1; /* skip final quote */
2026 error(37); /* invalid (non-terminated) string */
2028 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2029 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2030 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2031 && *(lptr + 2) == '\"'))
2032 { /* packed string literal */
2034 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2035 *lexvalue = _lexval = litidx;
2036 lptr += 2; /* skip exclamation point and double quote */
2038 lptr += 1; /* skip "escape" character too */
2040 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2043 lptr += 1; /* skip final quote */
2045 error(37); /* invalid (non-terminated) string */
2047 else if (*lptr == '\'')
2048 { /* character literal */
2049 lptr += 1; /* skip quote */
2051 *lexvalue = _lexval = litchar(&lptr, FALSE);
2053 lptr += 1; /* skip final quote */
2055 error(27); /* invalid character constant (must be one character) */
2057 else if (*lptr == ';')
2058 { /* semicolumn resets "error" flag */
2061 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2065 _lextok = *lptr; /* if every match fails, return the character */
2066 lptr += 1; /* increase the "lptr" pointer */
2073 * Pushes a token back, so the next call to lex() will return the token
2074 * last examined, instead of a new token.
2076 * Only one token can be pushed back.
2078 * In fact, lex() already stores the information it finds into global
2079 * variables, so all that is to be done is set a flag that informs lex()
2080 * to read and return the information from these variables, rather than
2081 * to read in a new token from the input file.
2086 assert(_pushed == FALSE);
2092 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2093 * symbol (a not continue with some old one). This is required upon return
2094 * from Assembler mode.
2102 lptr = strchr(pline, '\0');
2103 assert(lptr != NULL);
2109 * This routine is useful if only a simple check is needed. If the token
2110 * differs from the one expected, it is pushed back.
2113 matchtoken(int token)
2119 tok = lex(&val, &str);
2120 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2124 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2126 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2138 * Returns additional information of a token after using "matchtoken()"
2139 * or needtoken(). It does no harm using this routine after a call to
2140 * "lex()", but lex() already returns the same information.
2142 * The token itself is the return value. Normally, this one is already known.
2145 tokeninfo(cell * val, char **str)
2147 /* if the token was pushed back, tokeninfo() returns the token and
2148 * parameters of the *next* token, not of the *current* token.
2158 * This routine checks for a required token and gives an error message if
2159 * it isn't there (and returns FALSE in that case).
2161 * Global references: _lextok;
2164 needtoken(int token)
2166 char s1[20], s2[20];
2168 if (matchtoken(token))
2174 /* token already pushed back */
2177 sprintf(s1, "%c", (char)token); /* single character token */
2180 strncpy(s1, sc_tokens[token - tFIRST], 19); /* multi-character symbol */
2184 strcpy(s2, "-end of file-");
2185 else if (_lextok < 256)
2186 sprintf(s2, "%c", (char)_lextok);
2189 strncpy(s2, sc_tokens[_lextok - tFIRST], 19);
2192 error(1, s1, s2); /* expected ..., but found ... */
2199 * Compares a series of characters from the input file with the characters
2200 * in "st" (that contains a token). If the token on the input file matches
2201 * "st", the input file pointer "lptr" is adjusted to point to the next
2202 * token, otherwise "lptr" remains unaltered.
2204 * If the parameter "end: is true, match() requires that the first character
2205 * behind the recognized token is non-alphanumeric.
2207 * Global references: lptr (altered)
2210 match(char *st, int end)
2225 { /* symbol must terminate with non-alphanumeric char */
2229 lptr = ptr; /* match found, skip symbol */
2235 * Stores a value into the literal queue. The literal queue is used for
2236 * literal strings used in functions and for initializing array variables.
2238 * Global references: litidx (altered)
2244 if (litidx >= litmax)
2248 litmax += sDEF_LITMAX;
2249 p = (cell *) realloc(litq, litmax * sizeof(cell));
2251 error(102, "literal table"); /* literal table overflow (fatal error) */
2254 assert(litidx < litmax);
2255 litq[litidx++] = value;
2260 * Return current literal character and increase the pointer to point
2261 * just behind this literal character.
2263 * Note: standard "escape sequences" are suported, but the backslash may be
2264 * replaced by another character; the syntax '\ddd' is supported,
2265 * but ddd must be decimal!
2268 litchar(char **lptr, int rawmode)
2271 unsigned char *cptr;
2273 cptr = (unsigned char *)*lptr;
2274 if (rawmode || *cptr != sc_ctrlchar)
2275 { /* no escape character */
2282 if (*cptr == sc_ctrlchar)
2284 c = *cptr; /* \\ == \ (the escape character itself) */
2291 case 'a': /* \a == audible alarm */
2295 case 'b': /* \b == backspace */
2299 case 'e': /* \e == escape */
2303 case 'f': /* \f == form feed */
2307 case 'n': /* \n == NewLine character */
2311 case 'r': /* \r == carriage return */
2315 case 't': /* \t == horizontal TAB */
2319 case 'v': /* \v == vertical TAB */
2323 case '\'': /* \' == ' (single quote) */
2324 case '"': /* \" == " (single quote) */
2325 case '%': /* \% == % (percent) */
2330 if (sc_isdigit(*cptr))
2333 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2334 c = c * 10 + *cptr++ - '0';
2336 cptr++; /* swallow a trailing ';' */
2340 error(27); /* invalid character constant */
2345 *lptr = (char *)cptr;
2346 assert(c >= 0 && c < 256);
2352 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2353 * or an "at" sign ("@"). The "@" is an extension to standard C.
2358 return (sc_isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2363 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2368 return (alpha(c) || sc_isdigit(c));
2371 /* The local variable table must be searched backwards, so that the deepest
2372 * nesting of local variables is searched first. The simplest way to do
2373 * this is to insert all new items at the head of the list.
2374 * In the global list, the symbols are kept in sorted order, so that the
2375 * public functions are written in sorted order.
2378 add_symbol(symbol * root, symbol * entry, int sort)
2383 while (root->next && strcmp(entry->name, root->next->name) > 0)
2386 if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2391 memcpy(newsym, entry, sizeof(symbol));
2392 newsym->next = root->next;
2393 root->next = newsym;
2398 free_symbol(symbol * sym)
2402 /* free all sub-symbol allocated memory blocks, depending on the
2403 * kind of the symbol
2405 assert(sym != NULL);
2406 if (sym->ident == iFUNCTN)
2408 /* run through the argument list; "default array" arguments
2409 * must be freed explicitly; the tag list must also be freed */
2410 assert(sym->dim.arglist != NULL);
2411 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2413 if (arg->ident == iREFARRAY && arg->hasdefault)
2414 free(arg->defvalue.array.data);
2415 else if (arg->ident == iVARIABLE
2416 && ((arg->hasdefault & uSIZEOF) != 0
2417 || (arg->hasdefault & uTAGOF) != 0))
2418 free(arg->defvalue.size.symname);
2419 assert(arg->tags != NULL);
2422 free(sym->dim.arglist);
2424 assert(sym->refer != NULL);
2430 delete_symbol(symbol * root, symbol * sym)
2432 /* find the symbol and its predecessor
2433 * (this function assumes that you will never delete a symbol that is not
2434 * in the table pointed at by "root")
2436 assert(root != sym);
2437 while (root->next != sym)
2440 assert(root != NULL);
2443 /* unlink it, then free it */
2444 root->next = sym->next;
2449 delete_symbols(symbol * root, int level, int delete_labels,
2450 int delete_functions)
2454 /* erase only the symbols with a deeper nesting level than the
2455 * specified nesting level */
2459 if (sym->compound < level)
2461 if ((delete_labels || sym->ident != iLABEL)
2462 && (delete_functions || sym->ident != iFUNCTN
2463 || (sym->usage & uNATIVE) != 0) && (delete_functions
2464 || sym->ident != iCONSTEXPR
2465 || (sym->usage & uPREDEF) ==
2466 0) && (delete_functions
2472 root->next = sym->next;
2477 /* if the function was prototyped, but not implemented in this source,
2478 * mark it as such, so that its use can be flagged
2480 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2481 sym->usage |= uMISSING;
2482 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2483 || sym->ident == iARRAY)
2484 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2485 /* for user defined operators, also remove the "prototyped" flag, as
2486 * user-defined operators *must* be declared before use
2488 if (sym->ident == iFUNCTN && !sc_isalpha(*sym->name)
2489 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2490 sym->usage &= ~uPROTOTYPED;
2491 root = sym; /* skip the symbol */
2496 /* The purpose of the hash is to reduce the frequency of a "name"
2497 * comparison (which is costly). There is little interest in avoiding
2498 * clusters in similar names, which is why this function is plain simple.
2501 namehash(char *name)
2503 unsigned char *ptr = (unsigned char *)name;
2504 int len = strlen(name);
2509 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2514 find_symbol(symbol * root, char *name, int fnumber)
2516 symbol *ptr = root->next;
2517 unsigned long hash = namehash(name);
2521 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2522 && !ptr->parent && (ptr->fnumber < 0
2523 || ptr->fnumber == fnumber))
2531 find_symbol_child(symbol * root, symbol * sym)
2533 symbol *ptr = root->next;
2537 if (ptr->parent == sym)
2544 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2545 * bywhom will be the function that uses a variable or that calls
2549 refer_symbol(symbol * entry, symbol * bywhom)
2553 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2554 assert(entry != NULL);
2555 assert(entry->refer != NULL);
2557 /* see if it is already there */
2558 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2561 if (count < entry->numrefers)
2563 assert(entry->refer[count] == bywhom);
2567 /* see if there is an empty spot in the referrer list */
2568 for (count = 0; count < entry->numrefers && entry->refer[count];
2571 assert(count <= entry->numrefers);
2572 if (count == entry->numrefers)
2575 int newsize = 2 * entry->numrefers;
2577 assert(newsize > 0);
2578 /* grow the referrer list */
2579 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2581 return FALSE; /* insufficient memory */
2582 /* initialize the new entries */
2583 entry->refer = refer;
2584 for (count = entry->numrefers; count < newsize; count++)
2585 entry->refer[count] = NULL;
2586 count = entry->numrefers; /* first empty spot */
2587 entry->numrefers = newsize;
2590 /* add the referrer */
2591 assert(entry->refer[count] == NULL);
2592 entry->refer[count] = bywhom;
2597 markusage(symbol * sym, int usage)
2599 sym->usage |= (char)usage;
2600 /* check if (global) reference must be added to the symbol */
2601 if ((usage & (uREAD | uWRITTEN)) != 0)
2603 /* only do this for global symbols */
2604 if (sym->vclass == sGLOBAL)
2606 /* "curfunc" should always be valid, since statements may not occurs
2607 * outside functions; in the case of syntax errors, however, the
2608 * compiler may arrive through this function
2611 refer_symbol(sym, curfunc);
2618 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2623 return find_symbol(&glbtab, name, fcurrent);
2628 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2629 * See add_symbol() how the deepest nesting level is searched first.
2634 return find_symbol(&loctab, name, -1);
2638 findconst(char *name)
2642 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2643 if (!sym || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2644 sym = find_symbol(&glbtab, name, fcurrent);
2645 if (!sym || sym->ident != iCONSTEXPR)
2647 assert(sym->parent == NULL); /* constants have no hierarchy */
2652 finddepend(symbol * parent)
2656 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2657 if (!sym) /* not found */
2658 sym = find_symbol_child(&glbtab, parent);
2664 * Adds a symbol to the symbol table (either global or local variables,
2665 * or global and local constants).
2668 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2670 symbol entry, **refer;
2672 /* global variables/constants/functions may only be defined once */
2673 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2674 || findglb(name) == NULL);
2675 /* labels may only be defined once */
2676 assert(ident != iLABEL || findloc(name) == NULL);
2678 /* create an empty referrer list */
2679 if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2681 error(103); /* insufficient memory */
2686 /* first fill in the entry */
2687 strncpy(entry.name, name, sizeof(entry.name) - 1);
2688 entry.name[sizeof(entry.name) - 1] = 0;
2689 entry.hash = namehash(name);
2691 entry.vclass = (char)vclass;
2692 entry.ident = (char)ident;
2694 entry.usage = (char)usage;
2695 entry.compound = 0; /* may be overridden later */
2696 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2697 entry.numrefers = 1;
2698 entry.refer = refer;
2699 entry.parent = NULL;
2701 /* then insert it in the list */
2702 if (vclass == sGLOBAL)
2703 return add_symbol(&glbtab, &entry, TRUE);
2705 return add_symbol(&loctab, &entry, FALSE);
2709 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2710 int dim[], int numdim, int idxtag[])
2712 symbol *sym, *parent, *top;
2715 /* global variables may only be defined once */
2716 assert(vclass != sGLOBAL || (sym = findglb(name)) == NULL
2717 || (sym->usage & uDEFINE) == 0);
2719 if (ident == iARRAY || ident == iREFARRAY)
2722 sym = NULL; /* to avoid a compiler warning */
2723 for (level = 0; level < numdim; level++)
2725 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2726 top->dim.array.length = dim[level];
2727 top->dim.array.level = (short)(numdim - level - 1);
2728 top->x.idxtag = idxtag[level];
2729 top->parent = parent;
2737 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2744 * Return next available internal label number.
2754 * Converts a number to a hexadecimal string and returns a pointer to that
2760 static char itohstr[15]; /* hex number is 10 characters long at most */
2762 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2771 for (i = 0; i < max; i += 1)
2773 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2777 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2781 if (nibble[i] >= 10)
2782 *ptr++ = (char)('a' + (nibble[i] - 10));
2784 *ptr++ = (char)('0' + nibble[i]);
2787 *ptr = '\0'; /* and a zero-terminator */