2 * vim:ts=8:sw=3:sts=8:noexpandtab:cino=>5n-3f0^-2{2
4 * Small compiler - File input, preprocessing and lexical analysis functions
6 * Copyright (c) ITB CompuPhase, 1997-2003
8 * This software is provided "as-is", without any express or implied warranty.
9 * In no event will the authors be held liable for any damages arising from
10 * the use of this software.
12 * Permission is granted to anyone to use this software for any purpose,
13 * including commercial applications, and to alter it and redistribute it
14 * freely, subject to the following restrictions:
16 * 1. The origin of this software must not be misrepresented; you must not
17 * claim that you wrote the original software. If you use this software in
18 * a product, an acknowledgment in the product documentation would be
19 * appreciated but is not required.
20 * 2. Altered source versions must be plainly marked as such, and must not be
21 * misrepresented as being the original software.
22 * 3. This notice may not be removed or altered from any source distribution.
28 * vim:ts=8:sw=3:sts=8:noexpandtab:cino=>5n-3f0^-2{2
41 #include "embryo_cc_osdefs.h"
42 #include "embryo_cc_sc.h"
45 static int match(char *st, int end);
46 static cell litchar(char **lptr, int rawmode);
47 static int alpha(char c);
49 static int icomment; /* currently in multiline comment? */
50 static int iflevel; /* nesting level if #if/#else/#endif */
51 static int skiplevel; /* level at which we started skipping */
52 static int elsedone; /* level at which we have seen an #else */
53 static char term_expr[] = "";
54 static int listline = -1; /* "current line" for the list file */
58 * Uses a LIFO stack to store information. The stack is used by doinclude(),
59 * doswitch() (to hold the state of "swactive") and some other routines.
61 * Porting note: I made the bold assumption that an integer will not be
62 * larger than a pointer (it may be smaller). That is, the stack element
63 * is typedef'ed as a pointer type, but I also store integers on it. See
66 * Global references: stack,stkidx (private to pushstk() and popstk())
68 static stkitem stack[sSTKMAX];
73 if (stkidx >= sSTKMAX)
74 error(102, "parser stack"); /* stack overflow (recursive include?) */
83 return (stkitem) - 1; /* stack is empty */
89 plungequalifiedfile(char *name)
91 static char *extensions[] = { ".inc", ".sma", ".small" };
99 fp = (FILE *) sc_opensrc(name);
100 ext = strchr(name, '\0'); /* save position */
103 /* try to append an extension */
104 strcpy(ext, extensions[ext_idx]);
105 fp = (FILE *) sc_opensrc(name);
107 *ext = '\0'; /* on failure, restore filename */
111 while (fp == NULL && ext_idx < (sizeof extensions / sizeof extensions[0]));
114 *ext = '\0'; /* restore filename */
117 pushstk((stkitem) inpf);
118 pushstk((stkitem) inpfname); /* pointer to current file name */
119 pushstk((stkitem) curlibrary);
120 /* FIXME: 64bit unsafe */
121 pushstk((stkitem) iflevel);
122 assert(skiplevel == 0);
123 /* FIXME: 64bit unsafe */
124 pushstk((stkitem) icomment);
125 /* FIXME: 64bit unsafe */
126 pushstk((stkitem) fcurrent);
127 /* FIXME: 64bit unsafe */
128 pushstk((stkitem) fline);
129 inpfname = strdup(name); /* set name of include file */
130 if (inpfname == NULL)
131 error(103); /* insufficient memory */
132 inpf = fp; /* set input file pointer to include file */
134 fline = 0; /* set current line number to 0 */
137 setfile(inpfname, fcurrent);
138 listline = -1; /* force a #line directive when changing the file */
139 setactivefile(fcurrent);
144 plungefile(char *name, int try_currentpath, int try_includepaths)
151 result = plungequalifiedfile(name);
153 if (try_includepaths && name[0] != DIRSEP_CHAR)
155 for (i = 0; !result && (ptr = get_path(i)) != NULL; i++)
157 char path[_MAX_PATH];
159 strncpy(path, ptr, sizeof path);
160 path[sizeof path - 1] = '\0'; /* force '\0' termination */
161 strncat(path, name, sizeof(path) - strlen(path));
162 path[sizeof path - 1] = '\0';
163 result = plungequalifiedfile(path);
170 check_empty(char *lptr)
172 /* verifies that the string contains only whitespace */
173 while (*lptr <= ' ' && *lptr != '\0')
176 error(38); /* extra characters on line */
181 * Gets the name of an include file, pushes the old file on the stack and
182 * sets some options. This routine doesn't use lex(), since lex() doesn't
183 * recognize file names (and directories).
185 * Global references: inpf (altered)
193 char name[_MAX_PATH], c;
196 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
198 if (*lptr == '<' || *lptr == '\"')
200 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
202 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
211 while (*lptr != c && *lptr != '\0' && i < sizeof name - 1) /* find the end of the string */
213 while (i > 0 && name[i - 1] <= ' ')
214 i--; /* strip trailing whitespace */
215 assert(i >= 0 && i < sizeof name);
216 name[i] = '\0'; /* zero-terminate the string */
219 { /* verify correct string termination */
220 error(37); /* invalid string */
224 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
226 /* Include files between "..." or without quotes are read from the current
227 * directory, or from a list of "include directories". Include files
228 * between <...> are only read from the list of include directories.
230 result = plungefile(name, (c != '>'), TRUE);
232 error(100, name); /* cannot read from ... (fatal error) */
237 * Reads in a new line from the input file pointed to by "inpf". readline()
238 * concatenates lines that end with a \ with the next line. If no more data
239 * can be read from the file, readline() attempts to pop off the previous file
240 * from the stack. If that fails too, it sets "freading" to 0.
242 * Global references: inpf,fline,inpfname,freading,icomment (altered)
250 if (lptr == term_expr)
256 if (inpf == NULL || sc_eofsrc(inpf))
259 error(49); /* invalid line continuation */
260 if (inpf != NULL && inpf != inpf_org)
262 i = (int)(long)popstk();
264 { /* All's done; popstk() returns "stack is empty" */
267 /* when there is nothing more to read, the #if/#else stack should
268 * be empty and we should not be in a comment
270 assert(iflevel >= 0);
272 error(1, "#endif", "-end of file-");
274 error(1, "*/", "-end of file-");
278 fcurrent = (int)(long)popstk();
279 icomment = (int)(long)popstk();
280 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
281 iflevel = (int)(long)popstk();
282 curlibrary = (constvalue *) popstk();
283 free(inpfname); /* return memory allocated for the include file name */
284 inpfname = (char *)popstk();
285 inpf = (FILE *) popstk();
286 setactivefile(fcurrent);
287 listline = -1; /* force a #line directive when changing the file */
291 if (sc_readsrc(inpf, line, num) == NULL)
293 *line = '\0'; /* delete line */
298 /* check whether to erase leading spaces */
303 while (*ptr == ' ' || *ptr == '\t')
306 memmove(line, ptr, strlen(ptr) + 1);
309 /* check whether a full line was read */
310 if (strchr(line, '\n') == NULL && !sc_eofsrc(inpf))
311 error(75); /* line too long */
312 /* check if the next line must be concatenated to this line */
313 if ((ptr = strchr(line, '\n')) != NULL && ptr > line)
315 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
317 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
318 ptr--; /* skip trailing whitespace */
322 /* set '\a' at the position of '\\' to make it possible to check
323 * for a line continuation in a single line comment (error 49)
326 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
330 line += strlen(line);
334 while (num >= 0 && cont);
339 * Replaces all comments from the line by space characters. It updates
340 * a global variable ("icomment") for multiline comments.
342 * This routine also supports the C++ extension for single line comments.
343 * These comments are started with "//" and end at the end of the line.
345 * Global references: icomment (private to "stripcom")
356 if (*line == '*' && *(line + 1) == '/')
358 icomment = FALSE; /* comment has ended */
359 *line = ' '; /* replace '*' and '/' characters by spaces */
365 if (*line == '/' && *(line + 1) == '*')
366 error(216); /* nested comment */
367 *line = ' '; /* replace comments by spaces */
373 if (*line == '/' && *(line + 1) == '*')
375 icomment = TRUE; /* start comment */
376 *line = ' '; /* replace '/' and '*' characters by spaces */
380 else if (*line == '/' && *(line + 1) == '/')
381 { /* comment to end of line */
382 if (strchr(line, '\a') != NULL)
383 error(49); /* invalid line continuation */
384 *line++ = '\n'; /* put "newline" at first slash */
385 *line = '\0'; /* put "zero-terminator" at second slash */
389 if (*line == '\"' || *line == '\'')
390 { /* leave literals unaltered */
391 c = *line; /* ending quote, single or double */
393 while ((*line != c || *(line - 1) == '\\')
396 line += 1; /* skip final quote */
409 * Attempts to interpret a numeric symbol as a boolean value. On success
410 * it returns the number of characters processed (so the line pointer can be
411 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
414 * A boolean value must start with "0b"
417 btoi(cell * val, char *curptr)
423 if (*ptr == '0' && *(ptr + 1) == 'b')
426 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
429 *val = (*val << 1) | (*ptr - '0');
437 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
440 return (int)(ptr - curptr);
445 * Attempts to interpret a numeric symbol as a decimal value. On success
446 * it returns the number of characters processed and the value is stored in
447 * "val". Otherwise it returns 0 and "val" is garbage.
450 dtoi(cell * val, char *curptr)
456 if (!isdigit(*ptr)) /* should start with digit */
458 while (isdigit(*ptr) || *ptr == '_')
461 *val = (*val * 10) + (*ptr - '0');
464 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
466 if (*ptr == '.' && isdigit(*(ptr + 1)))
467 return 0; /* but a fractional part must not be present */
468 return (int)(ptr - curptr);
473 * Attempts to interpret a numeric symbol as a hexadecimal value. On
474 * success it returns the number of characters processed and the value is
475 * stored in "val". Otherwise it return 0 and "val" is garbage.
478 htoi(cell * val, char *curptr)
484 if (!isdigit(*ptr)) /* should start with digit */
486 if (*ptr == '0' && *(ptr + 1) == 'x')
487 { /* C style hexadecimal notation */
489 while (isxdigit(*ptr) || *ptr == '_')
493 assert(isxdigit(*ptr));
496 *val += (*ptr - '0');
498 *val += (tolower(*ptr) - 'a' + 10);
510 return (int)(ptr - curptr);
540 * Attempts to interpret a numeric symbol as a rational number, either as
541 * IEEE 754 single precision floating point or as a fixed point integer.
542 * On success it returns the number of characters processed and the value is
543 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
545 * Small has stricter definition for floating point numbers than most:
546 * o the value must start with a digit; ".5" is not a valid number, you
548 * o a period must appear in the value, even if an exponent is given; "2e3"
549 * is not a valid number, you should write "2.0e3"
550 * o at least one digit must follow the period; "6." is not a valid number,
551 * you should write "6.0"
554 ftoi(cell * val, char *curptr)
557 double fnum, ffrac, fmult;
558 unsigned long dnum, dbase;
561 assert(rational_digits >= 0 && rational_digits < 9);
562 for (i = 0, dbase = 1; i < rational_digits; i++)
567 if (!isdigit(*ptr)) /* should start with digit */
569 while (isdigit(*ptr) || *ptr == '_')
573 fnum = (fnum * 10.0) + (*ptr - '0');
574 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
579 return 0; /* there must be a period */
581 if (!isdigit(*ptr)) /* there must be at least one digit after the dot */
586 while (isdigit(*ptr) || *ptr == '_')
590 ffrac = (ffrac * 10.0) + (*ptr - '0');
591 fmult = fmult / 10.0;
593 dnum += (*ptr - '0') * dbase;
594 if (dbase == 0L && sc_rationaltag && rational_digits > 0
597 error(222); /* number of digits exceeds rational number precision */
603 fnum += ffrac * fmult; /* form the number so far */
605 { /* optional fractional part */
618 if (!isdigit(*ptr)) /* 'e' should be followed by a digit */
621 while (isdigit(*ptr))
623 exp = (exp * 10) + (*ptr - '0');
627 fmult = pow10(exp * sign);
629 fmult = pow(10, exp * sign);
632 dnum *= (unsigned long)(fmult + 0.5);
635 /* decide how to store the number */
636 if (sc_rationaltag == 0)
638 error(70); /* rational number support was not enabled */
641 else if (rational_digits == 0)
644 *val = EMBRYO_FLOAT_TO_CELL((float) fnum);
646 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
647 * format (as mandated in the ANSI standard). Test this assumption anyway.
650 float test1 = 0.0, test2 = 50.0;
651 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
652 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
654 if (c1 != 0x00000000L)
657 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
658 "point math as embryo expects. this could be bad.\n"
660 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
662 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
663 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
666 else if (c2 != 0x42480000L)
669 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
670 "point math as embryo expects. This could be bad.\n"
672 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
674 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
675 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
687 return (int)(ptr - curptr);
692 * Reads in a number (binary, decimal or hexadecimal). It returns the number
693 * of characters processed or 0 if the symbol couldn't be interpreted as a
694 * number (in this case the argument "val" remains unchanged). This routine
695 * relies on the 'early dropout' implementation of the logical or (||)
698 * Note: the routine doesn't check for a sign (+ or -). The - is checked
699 * for at "hier2()" (in fact, it is viewed as an operator, not as a
700 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
703 number(cell * val, char *curptr)
708 if ((i = btoi(&value, curptr)) != 0 /* binary? */
709 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
710 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
717 return 0; /* else not a number */
722 chrcat(char *str, char chr)
724 str = strchr(str, '\0');
730 preproc_expr(cell * val, int *tag)
737 /* Disable staging; it should be disabled already because
738 * expressions may not be cut off half-way between conditional
739 * compilations. Reset the staging index, but keep the code
742 if (stgget(&index, &code_index))
744 error(57); /* unfinished expression */
745 stgdel(0, code_index);
748 /* append a special symbol to the string, so the expression
749 * analyzer won't try to read a next line when it encounters
752 assert(strlen(pline) < sLINEMAX);
753 term = strchr(pline, '\0');
754 assert(term != NULL);
755 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
756 result = constexpr(val, tag); /* get value (or 0 on error) */
757 *term = '\0'; /* erase the token (if still present) */
758 lexclr(FALSE); /* clear any "pushed" tokens */
763 * Returns returns a pointer behind the closing quote or to the other
764 * character that caused the input to be ended.
767 getstring(char *dest, int max, char *line)
769 assert(dest != NULL && line != NULL);
771 while (*line <= ' ' && *line != '\0')
772 line++; /* skip whitespace */
775 error(37); /* invalid string */
777 else if (*line == '\0')
782 while (*line != '"' && *line != '\0')
790 lptr++; /* skip closing " */
792 error(37); /* invalid string */
811 * Recognizes the compiler directives. The function returns:
812 * CMD_NONE the line must be processed
813 * CMD_TERM a pending expression must be completed before processing further lines
814 * Other value: the line must be skipped, because:
815 * CMD_CONDFALSE false "#if.." code
816 * CMD_EMPTYLINE line is empty
817 * CMD_INCLUDE the line contains a #include directive
818 * CMD_DEFINE the line contains a #subst directive
819 * CMD_IF the line contains a #if/#else/#endif directive
820 * CMD_DIRECTIVE the line contains some other compiler directive
822 * Global variables: iflevel, skiplevel, elsedone (altered)
834 while (*lptr <= ' ' && *lptr != '\0')
837 return CMD_EMPTYLINE; /* empty line */
839 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
840 /* compiler directive found */
841 indent_nowarn = TRUE; /* allow loose indentation" */
842 lexclr(FALSE); /* clear any "pushed" tokens */
843 /* on a pending expression, force to return a silent ';' token and force to
846 if (!sc_needsemicolon && stgget(&index, &code_index))
851 tok = lex(&val, &str);
852 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
855 case tpIF: /* conditional compilation */
859 break; /* break out of switch */
860 preproc_expr(&val, NULL); /* get value (or 0 on error) */
867 if (iflevel == 0 && skiplevel == 0)
869 error(26); /* no matching #if */
874 if (elsedone == iflevel)
875 error(60); /* multiple #else directives between #if ... #endif */
877 if (skiplevel == iflevel)
879 else if (skiplevel == 0)
884 #if 0 /* ??? *really* need to use a stack here */
887 if (iflevel == 0 && skiplevel == 0)
889 error(26); /* no matching #if */
892 else if (elsedone == iflevel)
894 error(61); /* #elseif directive may not follow an #else */
899 preproc_expr(&val, NULL); /* get value (or 0 on error) */
901 skiplevel = iflevel; /* we weren't skipping, start skipping now */
903 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
904 /* else: we were skipping and condition is invalid -> keep skipping */
911 if (iflevel == 0 && skiplevel == 0)
918 if (skiplevel == iflevel)
920 if (elsedone == iflevel)
921 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
922 * the state whether an #else was seen per nesting level */
927 case tINCLUDE: /* #include directive */
935 char pathname[_MAX_PATH];
937 lptr = getstring(pathname, sizeof pathname, lptr);
938 if (pathname[0] != '\0')
941 inpfname = strdup(pathname);
942 if (inpfname == NULL)
943 error(103); /* insufficient memory */
951 if (lex(&val, &str) != tNUMBER)
952 error(8); /* invalid/non-constant expression */
958 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
960 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
962 error(7); /* assertion failed */
969 if (lex(&val, &str) == tSYMBOL)
971 if (strcmp(str, "ctrlchar") == 0)
973 if (lex(&val, &str) != tNUMBER)
974 error(27); /* invalid character constant */
975 sc_ctrlchar = (char)val;
977 else if (strcmp(str, "compress") == 0)
981 preproc_expr(&val, NULL);
982 sc_compress = (int)val; /* switch code packing on/off */
984 else if (strcmp(str, "dynamic") == 0)
986 preproc_expr(&sc_stksize, NULL);
988 else if (strcmp(str, "library") == 0)
990 char name[sNAMEMAX + 1];
992 while (*lptr <= ' ' && *lptr != '\0')
996 lptr = getstring(name, sizeof name, lptr);
1002 for (i = 0; i < sizeof name && alphanum(*lptr);
1007 if (name[0] == '\0')
1013 if (strlen(name) > sEXPMAX)
1014 error(220, name, sEXPMAX); /* exported symbol is truncated */
1015 /* add the name if it does not yet exist in the table */
1016 if (find_constval(&libname_tab, name, 0) == NULL)
1018 append_constval(&libname_tab, name, 0, 0);
1021 else if (strcmp(str, "pack") == 0)
1025 preproc_expr(&val, NULL); /* default = packed/unpacked */
1026 sc_packstr = (int)val;
1028 else if (strcmp(str, "rational") == 0)
1030 char name[sNAMEMAX + 1];
1034 /* first gather all information, start with the tag name */
1035 while (*lptr <= ' ' && *lptr != '\0')
1037 for (i = 0; i < sizeof name && alphanum(*lptr);
1041 /* then the precision (for fixed point arithmetic) */
1042 while (*lptr <= ' ' && *lptr != '\0')
1046 preproc_expr(&digits, NULL);
1047 if (digits <= 0 || digits > 9)
1049 error(68); /* invalid rational number precision */
1055 /* add the tag (make it public) and check the values */
1056 i = sc_addtag(name);
1058 if (sc_rationaltag == 0
1059 || (sc_rationaltag == i
1060 && rational_digits == (int)digits))
1063 rational_digits = (int)digits;
1067 error(69); /* rational number format already set, can only be set once */
1070 else if (strcmp(str, "semicolon") == 0)
1074 preproc_expr(&val, NULL);
1075 sc_needsemicolon = (int)val;
1077 else if (strcmp(str, "tabsize") == 0)
1081 preproc_expr(&val, NULL);
1082 sc_tabsize = (int)val;
1084 else if (strcmp(str, "align") == 0)
1086 sc_alignnext = TRUE;
1088 else if (strcmp(str, "unused") == 0)
1090 char name[sNAMEMAX + 1];
1097 while (*lptr <= ' ' && *lptr != '\0')
1099 for (i = 0; i < sizeof name && isalpha(*lptr);
1103 /* get the symbol */
1104 sym = findloc(name);
1106 sym = findglb(name);
1109 sym->usage |= uREAD;
1110 if (sym->ident == iVARIABLE
1111 || sym->ident == iREFERENCE
1112 || sym->ident == iARRAY
1113 || sym->ident == iREFARRAY)
1114 sym->usage |= uWRITTEN;
1118 error(17, name); /* undefined symbol */
1120 /* see if a comma follows the name */
1121 while (*lptr <= ' ' && *lptr != '\0')
1123 comma = (*lptr == ',');
1131 error(207); /* unknown #pragma */
1136 error(207); /* unknown #pragma */
1146 assert(inpf != NULL);
1147 if (inpf != inpf_org)
1155 /* write opcode to output file */
1159 while (*lptr <= ' ' && *lptr != '\0')
1161 for (i = 0; i < 40 && (isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1162 name[i] = (char)tolower(*lptr);
1167 code_idx += opcodes(1);
1168 /* write parameter (if any) */
1169 while (*lptr <= ' ' && *lptr != '\0')
1175 tok = lex(&val, &str);
1181 code_idx += opargs(1);
1187 if (sym == NULL || (sym->ident != iFUNCTN
1188 && sym->ident != iREFFUNC
1189 && (sym->usage & uDEFINE) == 0))
1191 error(17, str); /* undefined symbol */
1195 outval(sym->addr, FALSE);
1196 /* mark symbol as "used", unknown whether for read or write */
1197 markusage(sym, uREAD | uWRITTEN);
1198 code_idx += opargs(1);
1204 extern char *sc_tokens[]; /* forward declaration */
1207 sprintf(s2, "%c", (char)tok);
1209 strcpy(s2, sc_tokens[tok - tFIRST]);
1210 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1220 #if !defined NO_DEFINE
1226 char *pattern, *substitution;
1228 int count, prefixlen;
1231 /* find the pattern to match */
1232 while (*lptr <= ' ' && *lptr != '\0')
1234 start = lptr; /* save starting point of the match pattern */
1236 while (*lptr > ' ' && *lptr != '\0')
1238 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1242 /* check pattern to match */
1243 if (!isalpha(*start) && *start != '_')
1245 error(74); /* pattern must start with an alphabetic character */
1248 /* store matched pattern */
1249 pattern = malloc(count + 1);
1250 if (pattern == NULL)
1251 error(103); /* insufficient memory */
1257 assert(*lptr != '\0');
1258 pattern[count++] = (char)litchar(&lptr, FALSE);
1260 pattern[count] = '\0';
1261 /* special case, erase trailing variable, because it could match anything */
1262 if (count >= 2 && isdigit(pattern[count - 1])
1263 && pattern[count - 2] == '%')
1264 pattern[count - 2] = '\0';
1265 /* find substitution string */
1266 while (*lptr <= ' ' && *lptr != '\0')
1268 start = lptr; /* save starting point of the match pattern */
1271 while (*lptr != '\0')
1273 /* keep position of the start of trailing whitespace */
1288 /* store matched substitution */
1289 substitution = malloc(count + 1); /* +1 for '\0' */
1290 if (substitution == NULL)
1291 error(103); /* insufficient memory */
1297 assert(*lptr != '\0');
1298 substitution[count++] = *lptr++;
1300 substitution[count] = '\0';
1301 /* check whether the definition already exists */
1302 for (prefixlen = 0, start = pattern;
1303 isalpha(*start) || isdigit(*start) || *start == '_';
1304 prefixlen++, start++)
1306 assert(prefixlen > 0);
1307 if ((def = find_subst(pattern, prefixlen)) != NULL)
1309 if (strcmp(def->first, pattern) != 0
1310 || strcmp(def->second, substitution) != 0)
1311 error(201, pattern); /* redefinition of macro (non-identical) */
1312 delete_subst(pattern, prefixlen);
1314 /* add the pattern/substitution pair to the list */
1315 assert(pattern[0] != '\0');
1316 insert_subst(pattern, substitution, prefixlen);
1325 if (lex(&val, &str) == tSYMBOL)
1327 if (!delete_subst(str, strlen(str)))
1328 error(17, str); /* undefined symbol */
1332 error(20, str); /* invalid symbol name */
1339 error(31); /* unknown compiler directive */
1340 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1345 #if !defined NO_DEFINE
1347 is_startstring(char *string)
1349 if (*string == '\"' || *string == '\'')
1350 return TRUE; /* "..." */
1355 if (*string == '\"' || *string == '\'')
1356 return TRUE; /* !"..." */
1357 if (*string == sc_ctrlchar)
1360 if (*string == '\"' || *string == '\'')
1361 return TRUE; /* !\"..." */
1364 else if (*string == sc_ctrlchar)
1367 if (*string == '\"' || *string == '\'')
1368 return TRUE; /* \"..." */
1372 if (*string == '\"' || *string == '\'')
1373 return TRUE; /* \!"..." */
1381 skipstring(char *string)
1384 int rawstring = FALSE;
1386 while (*string == '!' || *string == sc_ctrlchar)
1388 rawstring = (*string == sc_ctrlchar);
1393 assert(endquote == '\"' || endquote == '\'');
1394 string++; /* skip open quote */
1395 while (*string != endquote && *string != '\0')
1396 litchar(&string, rawstring);
1401 skippgroup(char *string)
1404 char open = *string;
1423 close = '\0'; /* only to avoid a compiler warning */
1427 while (*string != close || nest > 0)
1429 if (*string == open)
1431 else if (*string == close)
1433 else if (is_startstring(string))
1434 string = skipstring(string);
1435 if (*string == '\0')
1443 strdel(char *str, size_t len)
1445 size_t length = strlen(str);
1449 memmove(str, str + len, length - len + 1); /* include EOS byte */
1454 strins(char *dest, char *src, size_t srclen)
1456 size_t destlen = strlen(dest);
1458 assert(srclen <= strlen(src));
1459 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1460 memcpy(dest, src, srclen);
1465 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1468 char *p, *s, *e, *args[10];
1469 int match, arg, len;
1471 memset(args, 0, sizeof args);
1473 /* check the length of the prefix */
1474 for (prefixlen = 0, s = pattern; isalpha(*s) || isdigit(*s) || *s == '_';
1477 assert(prefixlen > 0);
1478 assert(strncmp(line, pattern, prefixlen) == 0);
1480 /* pattern prefix matches; match the rest of the pattern, gather
1483 s = line + prefixlen;
1484 p = pattern + prefixlen;
1485 match = TRUE; /* so far, pattern matches */
1486 while (match && *s != '\0' && *p != '\0')
1494 assert(arg >= 0 && arg <= 9);
1495 p++; /* skip parameter id */
1497 /* match the source string up to the character after the digit
1498 * (skipping strings in the process
1501 while (*e != *p && *e != '\0' && *e != '\n')
1503 if (is_startstring(e)) /* skip strings */
1505 else if (strchr("({[", *e) != NULL) /* skip parenthized groups */
1508 e++; /* skip non-alphapetic character (or closing quote of
1509 * a string, or the closing paranthese of a group) */
1511 /* store the parameter (overrule any earlier) */
1512 if (args[arg] != NULL)
1515 args[arg] = malloc(len + 1);
1516 if (args[arg] == NULL)
1517 error(103); /* insufficient memory */
1518 strncpy(args[arg], s, len);
1519 args[arg][len] = '\0';
1520 /* character behind the pattern was matched too */
1525 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1526 && !sc_needsemicolon)
1528 s = e; /* allow a trailing ; in the pattern match to end of line */
1532 assert(*e == '\0' || *e == '\n');
1543 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1545 /* source may be ';' or end of the line */
1546 while (*s <= ' ' && *s != '\0')
1547 s++; /* skip white space */
1548 if (*s != ';' && *s != '\0')
1550 p++; /* skip the semicolon in the pattern */
1556 /* skip whitespace between two non-alphanumeric characters, except
1557 * for two identical symbols
1559 assert(p > pattern);
1560 if (!alphanum(*p) && *(p - 1) != *p)
1561 while (*s <= ' ' && *s != '\0')
1562 s++; /* skip white space */
1563 ch = litchar(&p, FALSE); /* this increments "p" */
1567 s++; /* this character matches */
1571 if (match && *p == '\0')
1573 /* if the last character to match is an alphanumeric character, the
1574 * current character in the source may not be alphanumeric
1576 assert(p > pattern);
1577 if (alphanum(*(p - 1)) && alphanum(*s))
1583 /* calculate the length of the substituted string */
1584 for (e = substitution, len = 0; *e != '\0'; e++)
1586 if (*e == '%' && isdigit(*(e + 1)))
1588 arg = *(e + 1) - '0';
1589 assert(arg >= 0 && arg <= 9);
1590 if (args[arg] != NULL)
1591 len += strlen(args[arg]);
1592 e++; /* skip %, digit is skipped later */
1599 /* check length of the string after substitution */
1600 if (strlen(line) + len - (int)(s - line) > buffersize)
1602 error(75); /* line too long */
1606 /* substitute pattern */
1607 strdel(line, (int)(s - line));
1608 for (e = substitution, s = line; *e != '\0'; e++)
1610 if (*e == '%' && isdigit(*(e + 1)))
1612 arg = *(e + 1) - '0';
1613 assert(arg >= 0 && arg <= 9);
1614 if (args[arg] != NULL)
1616 strins(s, args[arg], strlen(args[arg]));
1617 s += strlen(args[arg]);
1619 e++; /* skip %, digit is skipped later */
1630 for (arg = 0; arg < 10; arg++)
1631 if (args[arg] != NULL)
1638 substallpatterns(char *line, int buffersize)
1645 while (*start != '\0')
1647 /* find the start of a prefix (skip all non-alphabetic characters),
1650 while (!isalpha(*start) && *start != '_' && *start != '\0')
1653 if (is_startstring(start))
1655 start = skipstring(start);
1657 break; /* abort loop on error */
1659 start++; /* skip non-alphapetic character (or closing quote of a string) */
1662 break; /* abort loop on error */
1663 /* get the prefix (length), look for a matching definition */
1666 while (isalpha(*end) || isdigit(*end) || *end == '_')
1671 assert(prefixlen > 0);
1672 subst = find_subst(start, prefixlen);
1675 /* properly match the pattern and substitute */
1677 (start, buffersize - (start - line), subst->first,
1679 start = end; /* match failed, skip this prefix */
1680 /* match succeeded: do not update "start", because the substitution text
1681 * may be matched by other macros
1686 start = end; /* no macro with this prefix, skip this prefix */
1694 * Reads a line by readline() into "pline" and performs basic preprocessing:
1695 * deleting comments, skipping lines with false "#if.." code and recognizing
1696 * other compiler directives. There is an indirect recursion: lex() calls
1697 * preprocess() if a new line must be read, preprocess() calls command(),
1698 * which at his turn calls lex() to identify the token.
1700 * Global references: lptr (altered)
1702 * freading (referred to only)
1714 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1715 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1716 iscommand = command();
1717 if (iscommand != CMD_NONE)
1718 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1719 #if !defined NO_DEFINE
1720 if (iscommand == CMD_NONE)
1722 assert(lptr != term_expr);
1723 substallpatterns(pline, sLINEMAX);
1724 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1728 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1732 unpackedstring(char *lptr, int rawstring)
1734 while (*lptr != '\0')
1736 /* check for doublequotes indicating the end of the string */
1739 /* check whether there's another pair of quotes following.
1740 * If so, paste the two strings together, thus
1741 * "pants""off" becomes "pantsoff"
1743 if (*(lptr + 1) == '\"')
1750 { /* ignore '\a' (which was inserted at a line concatenation) */
1754 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1756 stowlit(0); /* terminate string */
1761 packedstring(char *lptr, int rawstring)
1766 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1768 while (*lptr != '\0')
1770 /* check for doublequotes indicating the end of the string */
1773 /* check whether there's another pair of quotes following.
1774 * If so, paste the two strings together, thus
1775 * "pants""off" becomes "pantsoff"
1777 if (*(lptr + 1) == '\"')
1784 { /* ignore '\a' (which was inserted at a line concatenation) */
1788 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1789 if (c >= (ucell) (1 << charbits))
1790 error(43); /* character constant exceeds range */
1791 val |= (c << 8 * i);
1797 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1799 /* save last code; make sure there is at least one terminating zero character */
1800 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1801 stowlit(val); /* at least one zero character in "val" */
1803 stowlit(0); /* add full cell of zeros */
1807 /* lex(lexvalue,lexsym) Lexical Analysis
1809 * lex() first deletes leading white space, then checks for multi-character
1810 * operators, keywords (including most compiler directives), numbers,
1811 * labels, symbols and literals (literal characters are converted to a number
1812 * and are returned as such). If every check fails, the line must contain
1813 * a single-character operator. So, lex() returns this character. In the other
1814 * case (something did match), lex() returns the number of the token. All
1815 * these tokens have been assigned numbers above 255.
1817 * Some tokens have "attributes":
1818 * tNUMBER the value of the number is return in "lexvalue".
1819 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1820 * encoding in "lexvalue".
1821 * tSYMBOL the first sNAMEMAX characters of the symbol are
1822 * stored in a buffer, a pointer to this buffer is
1823 * returned in "lexsym".
1824 * tLABEL the first sNAMEMAX characters of the label are
1825 * stored in a buffer, a pointer to this buffer is
1826 * returned in "lexsym".
1827 * tSTRING the string is stored in the literal pool, the index
1828 * in the literal pool to this string is stored in
1831 * lex() stores all information (the token found and possibly its attribute)
1832 * in global variables. This allows a token to be examined twice. If "_pushed"
1833 * is true, this information is returned.
1835 * Global references: lptr (altered)
1836 * fline (referred to only)
1837 * litidx (referred to only)
1838 * _lextok, _lexval, _lexstr
1844 static cell _lexval;
1845 static char _lexstr[sLINEMAX + 1];
1846 static int _lexnewline;
1851 stkidx = 0; /* index for pushstk() and popstk() */
1852 iflevel = 0; /* preprocessor: nesting of "#if" */
1853 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1854 icomment = FALSE; /* currently not in a multiline comment */
1855 _pushed = FALSE; /* no token pushed back into lex */
1856 _lexnewline = FALSE;
1859 char *sc_tokens[] = {
1860 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1861 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1863 "assert", "break", "case", "char", "const", "continue", "default",
1864 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1865 "if", "native", "new", "operator", "public", "return", "sizeof",
1866 "sleep", "static", "stock", "switch", "tagof", "while",
1867 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1868 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1869 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1870 "-label-", "-string-"
1874 lex(cell * lexvalue, char **lexsym)
1876 int i, toolong, newline, rawstring;
1881 _pushed = FALSE; /* reset "_pushed" flag */
1882 *lexvalue = _lexval;
1887 _lextok = 0; /* preset all values */
1890 *lexvalue = _lexval;
1892 _lexnewline = FALSE;
1896 newline = (lptr == pline); /* does lptr point to start of line buffer */
1897 while (*lptr <= ' ')
1898 { /* delete leading white space */
1901 preprocess(); /* preprocess resets "lptr" */
1904 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1905 return (_lextok = tENDEXPR);
1906 _lexnewline = TRUE; /* set this after preprocess(), because
1907 * preprocess() calls lex() recursively */
1918 for (i = 0; i < (int)(lptr - pline); i++)
1919 if (pline[i] == '\t' && sc_tabsize > 0)
1921 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1928 while (i <= tMIDDLE)
1929 { /* match multi-character operators */
1930 if (match(*tokptr, FALSE))
1939 { /* match reserved words and compiler directives */
1940 if (match(*tokptr, TRUE))
1943 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1950 if ((i = number(&_lexval, lptr)) != 0)
1953 *lexvalue = _lexval;
1956 else if ((i = ftoi(&_lexval, lptr)) != 0)
1958 _lextok = tRATIONAL;
1959 *lexvalue = _lexval;
1962 else if (alpha(*lptr))
1963 { /* symbol or label */
1964 /* Note: only sNAMEMAX characters are significant. The compiler
1965 * generates a warning if a symbol exceeds this length.
1970 while (alphanum(*lptr))
1981 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1982 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
1984 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
1986 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
1988 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
1990 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
1992 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
1993 lptr += 1; /* skip colon */
1996 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
1997 { /* unpacked string literal */
1999 rawstring = (*lptr == sc_ctrlchar);
2000 *lexvalue = _lexval = litidx;
2001 lptr += 1; /* skip double quote */
2003 lptr += 1; /* skip "escape" character too */
2005 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2008 lptr += 1; /* skip final quote */
2010 error(37); /* invalid (non-terminated) string */
2012 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2013 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2014 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2015 && *(lptr + 2) == '\"'))
2016 { /* packed string literal */
2018 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2019 *lexvalue = _lexval = litidx;
2020 lptr += 2; /* skip exclamation point and double quote */
2022 lptr += 1; /* skip "escape" character too */
2024 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2027 lptr += 1; /* skip final quote */
2029 error(37); /* invalid (non-terminated) string */
2031 else if (*lptr == '\'')
2032 { /* character literal */
2033 lptr += 1; /* skip quote */
2035 *lexvalue = _lexval = litchar(&lptr, FALSE);
2037 lptr += 1; /* skip final quote */
2039 error(27); /* invalid character constant (must be one character) */
2041 else if (*lptr == ';')
2042 { /* semicolumn resets "error" flag */
2045 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2049 _lextok = *lptr; /* if every match fails, return the character */
2050 lptr += 1; /* increase the "lptr" pointer */
2057 * Pushes a token back, so the next call to lex() will return the token
2058 * last examined, instead of a new token.
2060 * Only one token can be pushed back.
2062 * In fact, lex() already stores the information it finds into global
2063 * variables, so all that is to be done is set a flag that informs lex()
2064 * to read and return the information from these variables, rather than
2065 * to read in a new token from the input file.
2070 assert(_pushed == FALSE);
2076 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2077 * symbol (a not continue with some old one). This is required upon return
2078 * from Assembler mode.
2086 lptr = strchr(pline, '\0');
2087 assert(lptr != NULL);
2093 * This routine is useful if only a simple check is needed. If the token
2094 * differs from the one expected, it is pushed back.
2097 matchtoken(int token)
2103 tok = lex(&val, &str);
2104 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2108 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2110 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2122 * Returns additional information of a token after using "matchtoken()"
2123 * or needtoken(). It does no harm using this routine after a call to
2124 * "lex()", but lex() already returns the same information.
2126 * The token itself is the return value. Normally, this one is already known.
2129 tokeninfo(cell * val, char **str)
2131 /* if the token was pushed back, tokeninfo() returns the token and
2132 * parameters of the *next* token, not of the *current* token.
2142 * This routine checks for a required token and gives an error message if
2143 * it isn't there (and returns FALSE in that case).
2145 * Global references: _lextok;
2148 needtoken(int token)
2150 char s1[20], s2[20];
2152 if (matchtoken(token))
2158 /* token already pushed back */
2161 sprintf(s1, "%c", (char)token); /* single character token */
2163 strcpy(s1, sc_tokens[token - tFIRST]); /* multi-character symbol */
2165 strcpy(s2, "-end of file-");
2166 else if (_lextok < 256)
2167 sprintf(s2, "%c", (char)_lextok);
2169 strcpy(s2, sc_tokens[_lextok - tFIRST]);
2170 error(1, s1, s2); /* expected ..., but found ... */
2177 * Compares a series of characters from the input file with the characters
2178 * in "st" (that contains a token). If the token on the input file matches
2179 * "st", the input file pointer "lptr" is adjusted to point to the next
2180 * token, otherwise "lptr" remains unaltered.
2182 * If the parameter "end: is true, match() requires that the first character
2183 * behind the recognized token is non-alphanumeric.
2185 * Global references: lptr (altered)
2188 match(char *st, int end)
2203 { /* symbol must terminate with non-alphanumeric char */
2207 lptr = ptr; /* match found, skip symbol */
2213 * Stores a value into the literal queue. The literal queue is used for
2214 * literal strings used in functions and for initializing array variables.
2216 * Global references: litidx (altered)
2222 if (litidx >= litmax)
2226 litmax += sDEF_LITMAX;
2227 p = (cell *) realloc(litq, litmax * sizeof(cell));
2229 error(102, "literal table"); /* literal table overflow (fatal error) */
2232 assert(litidx < litmax);
2233 litq[litidx++] = value;
2238 * Return current literal character and increase the pointer to point
2239 * just behind this literal character.
2241 * Note: standard "escape sequences" are suported, but the backslash may be
2242 * replaced by another character; the syntax '\ddd' is supported,
2243 * but ddd must be decimal!
2246 litchar(char **lptr, int rawmode)
2249 unsigned char *cptr;
2251 cptr = (unsigned char *)*lptr;
2252 if (rawmode || *cptr != sc_ctrlchar)
2253 { /* no escape character */
2260 if (*cptr == sc_ctrlchar)
2262 c = *cptr; /* \\ == \ (the escape character itself) */
2269 case 'a': /* \a == audible alarm */
2273 case 'b': /* \b == backspace */
2277 case 'e': /* \e == escape */
2281 case 'f': /* \f == form feed */
2285 case 'n': /* \n == NewLine character */
2289 case 'r': /* \r == carriage return */
2293 case 't': /* \t == horizontal TAB */
2297 case 'v': /* \v == vertical TAB */
2301 case '\'': /* \' == ' (single quote) */
2302 case '"': /* \" == " (single quote) */
2303 case '%': /* \% == % (percent) */
2311 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2312 c = c * 10 + *cptr++ - '0';
2314 cptr++; /* swallow a trailing ';' */
2318 error(27); /* invalid character constant */
2323 *lptr = (char *)cptr;
2324 assert(c >= 0 && c < 256);
2330 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2331 * or an "at" sign ("@"). The "@" is an extension to standard C.
2336 return (isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2341 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2346 return (alpha(c) || isdigit(c));
2349 /* The local variable table must be searched backwards, so that the deepest
2350 * nesting of local variables is searched first. The simplest way to do
2351 * this is to insert all new items at the head of the list.
2352 * In the global list, the symbols are kept in sorted order, so that the
2353 * public functions are written in sorted order.
2356 add_symbol(symbol * root, symbol * entry, int sort)
2361 while (root->next != NULL && strcmp(entry->name, root->next->name) > 0)
2364 if ((newsym = (symbol *) malloc(sizeof(symbol))) == NULL)
2369 memcpy(newsym, entry, sizeof(symbol));
2370 newsym->next = root->next;
2371 root->next = newsym;
2376 free_symbol(symbol * sym)
2380 /* free all sub-symbol allocated memory blocks, depending on the
2381 * kind of the symbol
2383 assert(sym != NULL);
2384 if (sym->ident == iFUNCTN)
2386 /* run through the argument list; "default array" arguments
2387 * must be freed explicitly; the tag list must also be freed */
2388 assert(sym->dim.arglist != NULL);
2389 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2391 if (arg->ident == iREFARRAY && arg->hasdefault)
2392 free(arg->defvalue.array.data);
2393 else if (arg->ident == iVARIABLE
2394 && ((arg->hasdefault & uSIZEOF) != 0
2395 || (arg->hasdefault & uTAGOF) != 0))
2396 free(arg->defvalue.size.symname);
2397 assert(arg->tags != NULL);
2400 free(sym->dim.arglist);
2402 assert(sym->refer != NULL);
2408 delete_symbol(symbol * root, symbol * sym)
2410 /* find the symbol and its predecessor
2411 * (this function assumes that you will never delete a symbol that is not
2412 * in the table pointed at by "root")
2414 assert(root != sym);
2415 while (root->next != sym)
2418 assert(root != NULL);
2421 /* unlink it, then free it */
2422 root->next = sym->next;
2427 delete_symbols(symbol * root, int level, int delete_labels,
2428 int delete_functions)
2432 /* erase only the symbols with a deeper nesting level than the
2433 * specified nesting level */
2434 while (root->next != NULL)
2437 if (sym->compound < level)
2439 if ((delete_labels || sym->ident != iLABEL)
2440 && (delete_functions || sym->ident != iFUNCTN
2441 || (sym->usage & uNATIVE) != 0) && (delete_functions
2442 || sym->ident != iCONSTEXPR
2443 || (sym->usage & uPREDEF) ==
2444 0) && (delete_functions
2450 root->next = sym->next;
2455 /* if the function was prototyped, but not implemented in this source,
2456 * mark it as such, so that its use can be flagged
2458 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2459 sym->usage |= uMISSING;
2460 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2461 || sym->ident == iARRAY)
2462 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2463 /* for user defined operators, also remove the "prototyped" flag, as
2464 * user-defined operators *must* be declared before use
2466 if (sym->ident == iFUNCTN && !isalpha(*sym->name)
2467 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2468 sym->usage &= ~uPROTOTYPED;
2469 root = sym; /* skip the symbol */
2474 /* The purpose of the hash is to reduce the frequency of a "name"
2475 * comparison (which is costly). There is little interest in avoiding
2476 * clusters in similar names, which is why this function is plain simple.
2479 namehash(char *name)
2481 unsigned char *ptr = (unsigned char *)name;
2482 int len = strlen(name);
2487 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2492 find_symbol(symbol * root, char *name, int fnumber)
2494 symbol *ptr = root->next;
2495 unsigned long hash = namehash(name);
2499 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2500 && ptr->parent == NULL && (ptr->fnumber < 0
2501 || ptr->fnumber == fnumber))
2509 find_symbol_child(symbol * root, symbol * sym)
2511 symbol *ptr = root->next;
2515 if (ptr->parent == sym)
2522 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2523 * bywhom will be the function that uses a variable or that calls
2527 refer_symbol(symbol * entry, symbol * bywhom)
2531 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2532 assert(entry != NULL);
2533 assert(entry->refer != NULL);
2535 /* see if it is already there */
2536 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2539 if (count < entry->numrefers)
2541 assert(entry->refer[count] == bywhom);
2545 /* see if there is an empty spot in the referrer list */
2546 for (count = 0; count < entry->numrefers && entry->refer[count] != NULL;
2549 assert(count <= entry->numrefers);
2550 if (count == entry->numrefers)
2553 int newsize = 2 * entry->numrefers;
2555 assert(newsize > 0);
2556 /* grow the referrer list */
2557 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2559 return FALSE; /* insufficient memory */
2560 /* initialize the new entries */
2561 entry->refer = refer;
2562 for (count = entry->numrefers; count < newsize; count++)
2563 entry->refer[count] = NULL;
2564 count = entry->numrefers; /* first empty spot */
2565 entry->numrefers = newsize;
2568 /* add the referrer */
2569 assert(entry->refer[count] == NULL);
2570 entry->refer[count] = bywhom;
2575 markusage(symbol * sym, int usage)
2577 sym->usage |= (char)usage;
2578 /* check if (global) reference must be added to the symbol */
2579 if ((usage & (uREAD | uWRITTEN)) != 0)
2581 /* only do this for global symbols */
2582 if (sym->vclass == sGLOBAL)
2584 /* "curfunc" should always be valid, since statements may not occurs
2585 * outside functions; in the case of syntax errors, however, the
2586 * compiler may arrive through this function
2588 if (curfunc != NULL)
2589 refer_symbol(sym, curfunc);
2596 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2601 return find_symbol(&glbtab, name, fcurrent);
2606 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2607 * See add_symbol() how the deepest nesting level is searched first.
2612 return find_symbol(&loctab, name, -1);
2616 findconst(char *name)
2620 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2621 if (sym == NULL || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2622 sym = find_symbol(&glbtab, name, fcurrent);
2623 if (sym == NULL || sym->ident != iCONSTEXPR)
2625 assert(sym->parent == NULL); /* constants have no hierarchy */
2630 finddepend(symbol * parent)
2634 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2635 if (sym == NULL) /* not found */
2636 sym = find_symbol_child(&glbtab, parent);
2642 * Adds a symbol to the symbol table (either global or local variables,
2643 * or global and local constants).
2646 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2648 symbol entry, **refer;
2650 /* global variables/constants/functions may only be defined once */
2651 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2652 || findglb(name) == NULL);
2653 /* labels may only be defined once */
2654 assert(ident != iLABEL || findloc(name) == NULL);
2656 /* create an empty referrer list */
2657 if ((refer = (symbol **) malloc(sizeof(symbol *))) == NULL)
2659 error(103); /* insufficient memory */
2664 /* first fill in the entry */
2665 strcpy(entry.name, name);
2666 entry.hash = namehash(name);
2668 entry.vclass = (char)vclass;
2669 entry.ident = (char)ident;
2671 entry.usage = (char)usage;
2672 entry.compound = 0; /* may be overridden later */
2673 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2674 entry.numrefers = 1;
2675 entry.refer = refer;
2676 entry.parent = NULL;
2678 /* then insert it in the list */
2679 if (vclass == sGLOBAL)
2680 return add_symbol(&glbtab, &entry, TRUE);
2682 return add_symbol(&loctab, &entry, FALSE);
2686 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2687 int dim[], int numdim, int idxtag[])
2689 symbol *sym, *parent, *top;
2692 /* global variables may only be defined once */
2693 assert(vclass != sGLOBAL || (sym = findglb(name)) == NULL
2694 || (sym->usage & uDEFINE) == 0);
2696 if (ident == iARRAY || ident == iREFARRAY)
2699 sym = NULL; /* to avoid a compiler warning */
2700 for (level = 0; level < numdim; level++)
2702 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2703 top->dim.array.length = dim[level];
2704 top->dim.array.level = (short)(numdim - level - 1);
2705 top->x.idxtag = idxtag[level];
2706 top->parent = parent;
2714 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2721 * Return next available internal label number.
2731 * Converts a number to a hexadecimal string and returns a pointer to that
2737 static char itohstr[15]; /* hex number is 10 characters long at most */
2739 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2748 for (i = 0; i < max; i += 1)
2750 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2754 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2758 if (nibble[i] >= 10)
2759 *ptr++ = (char)('a' + (nibble[i] - 10));
2761 *ptr++ = (char)('0' + nibble[i]);
2764 *ptr = '\0'; /* and a zero-terminator */