2 * vim:ts=8:sw=3:sts=8:noexpandtab:cino=>5n-3f0^-2{2
4 * Small compiler - File input, preprocessing and lexical analysis functions
6 * Copyright (c) ITB CompuPhase, 1997-2003
8 * This software is provided "as-is", without any express or implied warranty.
9 * In no event will the authors be held liable for any damages arising from
10 * the use of this software.
12 * Permission is granted to anyone to use this software for any purpose,
13 * including commercial applications, and to alter it and redistribute it
14 * freely, subject to the following restrictions:
16 * 1. The origin of this software must not be misrepresented; you must not
17 * claim that you wrote the original software. If you use this software in
18 * a product, an acknowledgment in the product documentation would be
19 * appreciated but is not required.
20 * 2. Altered source versions must be plainly marked as such, and must not be
21 * misrepresented as being the original software.
22 * 3. This notice may not be removed or altered from any source distribution.
24 * Version: $Id: embryo_cc_sc2.c 45428 2010-01-22 06:37:19Z caro $
28 * vim:ts=8:sw=3:sts=8:noexpandtab:cino=>5n-3f0^-2{2
41 #include "embryo_cc_sc.h"
44 static int match(char *st, int end);
45 static cell litchar(char **lptr, int rawmode);
46 static int alpha(char c);
48 static int icomment; /* currently in multiline comment? */
49 static int iflevel; /* nesting level if #if/#else/#endif */
50 static int skiplevel; /* level at which we started skipping */
51 static int elsedone; /* level at which we have seen an #else */
52 static char term_expr[] = "";
53 static int listline = -1; /* "current line" for the list file */
57 * Uses a LIFO stack to store information. The stack is used by doinclude(),
58 * doswitch() (to hold the state of "swactive") and some other routines.
60 * Porting note: I made the bold assumption that an integer will not be
61 * larger than a pointer (it may be smaller). That is, the stack element
62 * is typedef'ed as a pointer type, but I also store integers on it. See
65 * Global references: stack,stkidx (private to pushstk() and popstk())
67 static stkitem stack[sSTKMAX];
72 if (stkidx >= sSTKMAX)
73 error(102, "parser stack"); /* stack overflow (recursive include?) */
82 return (stkitem) - 1; /* stack is empty */
88 plungequalifiedfile(char *name)
90 static char *extensions[] = { ".inc", ".sma", ".small" };
98 fp = (FILE *) sc_opensrc(name);
99 ext = strchr(name, '\0'); /* save position */
102 /* try to append an extension */
103 strcpy(ext, extensions[ext_idx]);
104 fp = (FILE *) sc_opensrc(name);
106 *ext = '\0'; /* on failure, restore filename */
110 while (fp == NULL && ext_idx < (sizeof extensions / sizeof extensions[0]));
113 *ext = '\0'; /* restore filename */
116 pushstk((stkitem) inpf);
117 pushstk((stkitem) inpfname); /* pointer to current file name */
118 pushstk((stkitem) curlibrary);
119 pushstk((stkitem) iflevel);
120 assert(skiplevel == 0);
121 pushstk((stkitem) icomment);
122 pushstk((stkitem) fcurrent);
123 pushstk((stkitem) fline);
124 inpfname = strdup(name); /* set name of include file */
125 if (inpfname == NULL)
126 error(103); /* insufficient memory */
127 inpf = fp; /* set input file pointer to include file */
129 fline = 0; /* set current line number to 0 */
132 setfile(inpfname, fcurrent);
133 listline = -1; /* force a #line directive when changing the file */
134 setactivefile(fcurrent);
139 plungefile(char *name, int try_currentpath, int try_includepaths)
146 result = plungequalifiedfile(name);
148 if (try_includepaths && name[0] != DIRSEP_CHAR)
150 for (i = 0; !result && (ptr = get_path(i)) != NULL; i++)
154 strncpy(path, ptr, sizeof path);
155 path[sizeof path - 1] = '\0'; /* force '\0' termination */
156 strncat(path, name, sizeof(path) - strlen(path));
157 path[sizeof path - 1] = '\0';
158 result = plungequalifiedfile(path);
165 check_empty(char *lptr)
167 /* verifies that the string contains only whitespace */
168 while (*lptr <= ' ' && *lptr != '\0')
171 error(38); /* extra characters on line */
176 * Gets the name of an include file, pushes the old file on the stack and
177 * sets some options. This routine doesn't use lex(), since lex() doesn't
178 * recognize file names (and directories).
180 * Global references: inpf (altered)
188 char name[PATH_MAX], c;
191 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
193 if (*lptr == '<' || *lptr == '\"')
195 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
197 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
206 while (*lptr != c && *lptr != '\0' && i < sizeof name - 1) /* find the end of the string */
208 while (i > 0 && name[i - 1] <= ' ')
209 i--; /* strip trailing whitespace */
210 assert(i >= 0 && i < sizeof name);
211 name[i] = '\0'; /* zero-terminate the string */
214 { /* verify correct string termination */
215 error(37); /* invalid string */
219 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
221 /* Include files between "..." or without quotes are read from the current
222 * directory, or from a list of "include directories". Include files
223 * between <...> are only read from the list of include directories.
225 result = plungefile(name, (c != '>'), TRUE);
227 error(100, name); /* cannot read from ... (fatal error) */
232 * Reads in a new line from the input file pointed to by "inpf". readline()
233 * concatenates lines that end with a \ with the next line. If no more data
234 * can be read from the file, readline() attempts to pop off the previous file
235 * from the stack. If that fails too, it sets "freading" to 0.
237 * Global references: inpf,fline,inpfname,freading,icomment (altered)
245 if (lptr == term_expr)
251 if (inpf == NULL || sc_eofsrc(inpf))
254 error(49); /* invalid line continuation */
255 if (inpf != NULL && inpf != inpf_org)
257 i = (int)(long)popstk();
259 { /* All's done; popstk() returns "stack is empty" */
262 /* when there is nothing more to read, the #if/#else stack should
263 * be empty and we should not be in a comment
265 assert(iflevel >= 0);
267 error(1, "#endif", "-end of file-");
269 error(1, "*/", "-end of file-");
273 fcurrent = (int)(long)popstk();
274 icomment = (int)(long)popstk();
275 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
276 iflevel = (int)(long)popstk();
277 curlibrary = (constvalue *) popstk();
278 free(inpfname); /* return memory allocated for the include file name */
279 inpfname = (char *)popstk();
280 inpf = (FILE *) popstk();
281 setactivefile(fcurrent);
282 listline = -1; /* force a #line directive when changing the file */
286 if (sc_readsrc(inpf, line, num) == NULL)
288 *line = '\0'; /* delete line */
293 /* check whether to erase leading spaces */
298 while (*ptr == ' ' || *ptr == '\t')
301 memmove(line, ptr, strlen(ptr) + 1);
304 /* check whether a full line was read */
305 if (strchr(line, '\n') == NULL && !sc_eofsrc(inpf))
306 error(75); /* line too long */
307 /* check if the next line must be concatenated to this line */
308 if ((ptr = strchr(line, '\n')) != NULL && ptr > line)
310 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
312 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
313 ptr--; /* skip trailing whitespace */
317 /* set '\a' at the position of '\\' to make it possible to check
318 * for a line continuation in a single line comment (error 49)
321 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
325 line += strlen(line);
329 while (num >= 0 && cont);
334 * Replaces all comments from the line by space characters. It updates
335 * a global variable ("icomment") for multiline comments.
337 * This routine also supports the C++ extension for single line comments.
338 * These comments are started with "//" and end at the end of the line.
340 * Global references: icomment (private to "stripcom")
351 if (*line == '*' && *(line + 1) == '/')
353 icomment = FALSE; /* comment has ended */
354 *line = ' '; /* replace '*' and '/' characters by spaces */
360 if (*line == '/' && *(line + 1) == '*')
361 error(216); /* nested comment */
362 *line = ' '; /* replace comments by spaces */
368 if (*line == '/' && *(line + 1) == '*')
370 icomment = TRUE; /* start comment */
371 *line = ' '; /* replace '/' and '*' characters by spaces */
375 else if (*line == '/' && *(line + 1) == '/')
376 { /* comment to end of line */
377 if (strchr(line, '\a') != NULL)
378 error(49); /* invalid line continuation */
379 *line++ = '\n'; /* put "newline" at first slash */
380 *line = '\0'; /* put "zero-terminator" at second slash */
384 if (*line == '\"' || *line == '\'')
385 { /* leave literals unaltered */
386 c = *line; /* ending quote, single or double */
388 while ((*line != c || *(line - 1) == '\\')
391 line += 1; /* skip final quote */
404 * Attempts to interpret a numeric symbol as a boolean value. On success
405 * it returns the number of characters processed (so the line pointer can be
406 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
409 * A boolean value must start with "0b"
412 btoi(cell * val, char *curptr)
418 if (*ptr == '0' && *(ptr + 1) == 'b')
421 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
424 *val = (*val << 1) | (*ptr - '0');
432 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
435 return (int)(ptr - curptr);
440 * Attempts to interpret a numeric symbol as a decimal value. On success
441 * it returns the number of characters processed and the value is stored in
442 * "val". Otherwise it returns 0 and "val" is garbage.
445 dtoi(cell * val, char *curptr)
451 if (!isdigit(*ptr)) /* should start with digit */
453 while (isdigit(*ptr) || *ptr == '_')
456 *val = (*val * 10) + (*ptr - '0');
459 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
461 if (*ptr == '.' && isdigit(*(ptr + 1)))
462 return 0; /* but a fractional part must not be present */
463 return (int)(ptr - curptr);
468 * Attempts to interpret a numeric symbol as a hexadecimal value. On
469 * success it returns the number of characters processed and the value is
470 * stored in "val". Otherwise it return 0 and "val" is garbage.
473 htoi(cell * val, char *curptr)
479 if (!isdigit(*ptr)) /* should start with digit */
481 if (*ptr == '0' && *(ptr + 1) == 'x')
482 { /* C style hexadecimal notation */
484 while (isxdigit(*ptr) || *ptr == '_')
488 assert(isxdigit(*ptr));
491 *val += (*ptr - '0');
493 *val += (tolower(*ptr) - 'a' + 10);
505 return (int)(ptr - curptr);
535 * Attempts to interpret a numeric symbol as a rational number, either as
536 * IEEE 754 single precision floating point or as a fixed point integer.
537 * On success it returns the number of characters processed and the value is
538 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
540 * Small has stricter definition for floating point numbers than most:
541 * o the value must start with a digit; ".5" is not a valid number, you
543 * o a period must appear in the value, even if an exponent is given; "2e3"
544 * is not a valid number, you should write "2.0e3"
545 * o at least one digit must follow the period; "6." is not a valid number,
546 * you should write "6.0"
549 ftoi(cell * val, char *curptr)
552 double fnum, ffrac, fmult;
553 unsigned long dnum, dbase;
556 assert(rational_digits >= 0 && rational_digits < 9);
557 for (i = 0, dbase = 1; i < rational_digits; i++)
562 if (!isdigit(*ptr)) /* should start with digit */
564 while (isdigit(*ptr) || *ptr == '_')
568 fnum = (fnum * 10.0) + (*ptr - '0');
569 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
574 return 0; /* there must be a period */
576 if (!isdigit(*ptr)) /* there must be at least one digit after the dot */
581 while (isdigit(*ptr) || *ptr == '_')
585 ffrac = (ffrac * 10.0) + (*ptr - '0');
586 fmult = fmult / 10.0;
588 dnum += (*ptr - '0') * dbase;
589 if (dbase == 0L && sc_rationaltag && rational_digits > 0
592 error(222); /* number of digits exceeds rational number precision */
598 fnum += ffrac * fmult; /* form the number so far */
600 { /* optional fractional part */
613 if (!isdigit(*ptr)) /* 'e' should be followed by a digit */
616 while (isdigit(*ptr))
618 exp = (exp * 10) + (*ptr - '0');
622 fmult = pow10(exp * sign);
624 fmult = pow(10, exp * sign);
627 dnum *= (unsigned long)(fmult + 0.5);
630 /* decide how to store the number */
631 if (sc_rationaltag == 0)
633 error(70); /* rational number support was not enabled */
636 else if (rational_digits == 0)
638 float f = (float) fnum;
640 *val = EMBRYO_FLOAT_TO_CELL(f);
642 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
643 * format (as mandated in the ANSI standard). Test this assumption anyway.
646 float test1 = 0.0, test2 = 50.0;
647 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
648 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
650 if (c1 != 0x00000000L)
653 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
654 "point math as embryo expects. this could be bad.\n"
656 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
658 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
659 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
662 else if (c2 != 0x42480000L)
665 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
666 "point math as embryo expects. This could be bad.\n"
668 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
670 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
671 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
683 return (int)(ptr - curptr);
688 * Reads in a number (binary, decimal or hexadecimal). It returns the number
689 * of characters processed or 0 if the symbol couldn't be interpreted as a
690 * number (in this case the argument "val" remains unchanged). This routine
691 * relies on the 'early dropout' implementation of the logical or (||)
694 * Note: the routine doesn't check for a sign (+ or -). The - is checked
695 * for at "hier2()" (in fact, it is viewed as an operator, not as a
696 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
699 number(cell * val, char *curptr)
704 if ((i = btoi(&value, curptr)) != 0 /* binary? */
705 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
706 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
713 return 0; /* else not a number */
718 chrcat(char *str, char chr)
720 str = strchr(str, '\0');
726 preproc_expr(cell * val, int *tag)
733 /* Disable staging; it should be disabled already because
734 * expressions may not be cut off half-way between conditional
735 * compilations. Reset the staging index, but keep the code
738 if (stgget(&index, &code_index))
740 error(57); /* unfinished expression */
741 stgdel(0, code_index);
744 /* append a special symbol to the string, so the expression
745 * analyzer won't try to read a next line when it encounters
748 assert(strlen(pline) < sLINEMAX);
749 term = strchr(pline, '\0');
750 assert(term != NULL);
751 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
752 result = constexpr(val, tag); /* get value (or 0 on error) */
753 *term = '\0'; /* erase the token (if still present) */
754 lexclr(FALSE); /* clear any "pushed" tokens */
759 * Returns returns a pointer behind the closing quote or to the other
760 * character that caused the input to be ended.
763 getstring(char *dest, int max, char *line)
765 assert(dest != NULL && line != NULL);
767 while (*line <= ' ' && *line != '\0')
768 line++; /* skip whitespace */
771 error(37); /* invalid string */
773 else if (*line == '\0')
778 while (*line != '"' && *line != '\0')
786 lptr++; /* skip closing " */
788 error(37); /* invalid string */
807 * Recognizes the compiler directives. The function returns:
808 * CMD_NONE the line must be processed
809 * CMD_TERM a pending expression must be completed before processing further lines
810 * Other value: the line must be skipped, because:
811 * CMD_CONDFALSE false "#if.." code
812 * CMD_EMPTYLINE line is empty
813 * CMD_INCLUDE the line contains a #include directive
814 * CMD_DEFINE the line contains a #subst directive
815 * CMD_IF the line contains a #if/#else/#endif directive
816 * CMD_DIRECTIVE the line contains some other compiler directive
818 * Global variables: iflevel, skiplevel, elsedone (altered)
830 while (*lptr <= ' ' && *lptr != '\0')
833 return CMD_EMPTYLINE; /* empty line */
835 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
836 /* compiler directive found */
837 indent_nowarn = TRUE; /* allow loose indentation" */
838 lexclr(FALSE); /* clear any "pushed" tokens */
839 /* on a pending expression, force to return a silent ';' token and force to
842 if (!sc_needsemicolon && stgget(&index, &code_index))
847 tok = lex(&val, &str);
848 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
851 case tpIF: /* conditional compilation */
855 break; /* break out of switch */
856 preproc_expr(&val, NULL); /* get value (or 0 on error) */
863 if (iflevel == 0 && skiplevel == 0)
865 error(26); /* no matching #if */
870 if (elsedone == iflevel)
871 error(60); /* multiple #else directives between #if ... #endif */
873 if (skiplevel == iflevel)
875 else if (skiplevel == 0)
880 #if 0 /* ??? *really* need to use a stack here */
883 if (iflevel == 0 && skiplevel == 0)
885 error(26); /* no matching #if */
888 else if (elsedone == iflevel)
890 error(61); /* #elseif directive may not follow an #else */
895 preproc_expr(&val, NULL); /* get value (or 0 on error) */
897 skiplevel = iflevel; /* we weren't skipping, start skipping now */
899 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
900 /* else: we were skipping and condition is invalid -> keep skipping */
907 if (iflevel == 0 && skiplevel == 0)
914 if (skiplevel == iflevel)
916 if (elsedone == iflevel)
917 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
918 * the state whether an #else was seen per nesting level */
923 case tINCLUDE: /* #include directive */
931 char pathname[PATH_MAX];
933 lptr = getstring(pathname, sizeof pathname, lptr);
934 if (pathname[0] != '\0')
937 inpfname = strdup(pathname);
938 if (inpfname == NULL)
939 error(103); /* insufficient memory */
947 if (lex(&val, &str) != tNUMBER)
948 error(8); /* invalid/non-constant expression */
954 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
956 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
958 error(7); /* assertion failed */
965 if (lex(&val, &str) == tSYMBOL)
967 if (strcmp(str, "ctrlchar") == 0)
969 if (lex(&val, &str) != tNUMBER)
970 error(27); /* invalid character constant */
971 sc_ctrlchar = (char)val;
973 else if (strcmp(str, "compress") == 0)
977 preproc_expr(&val, NULL);
978 sc_compress = (int)val; /* switch code packing on/off */
980 else if (strcmp(str, "dynamic") == 0)
982 preproc_expr(&sc_stksize, NULL);
984 else if (strcmp(str, "library") == 0)
986 char name[sNAMEMAX + 1];
988 while (*lptr <= ' ' && *lptr != '\0')
992 lptr = getstring(name, sizeof name, lptr);
998 for (i = 0; i < sizeof name && alphanum(*lptr);
1003 if (name[0] == '\0')
1009 if (strlen(name) > sEXPMAX)
1010 error(220, name, sEXPMAX); /* exported symbol is truncated */
1011 /* add the name if it does not yet exist in the table */
1012 if (find_constval(&libname_tab, name, 0) == NULL)
1014 append_constval(&libname_tab, name, 0, 0);
1017 else if (strcmp(str, "pack") == 0)
1021 preproc_expr(&val, NULL); /* default = packed/unpacked */
1022 sc_packstr = (int)val;
1024 else if (strcmp(str, "rational") == 0)
1026 char name[sNAMEMAX + 1];
1030 /* first gather all information, start with the tag name */
1031 while (*lptr <= ' ' && *lptr != '\0')
1033 for (i = 0; i < sizeof name && alphanum(*lptr);
1037 /* then the precision (for fixed point arithmetic) */
1038 while (*lptr <= ' ' && *lptr != '\0')
1042 preproc_expr(&digits, NULL);
1043 if (digits <= 0 || digits > 9)
1045 error(68); /* invalid rational number precision */
1051 /* add the tag (make it public) and check the values */
1052 i = sc_addtag(name);
1054 if (sc_rationaltag == 0
1055 || (sc_rationaltag == i
1056 && rational_digits == (int)digits))
1059 rational_digits = (int)digits;
1063 error(69); /* rational number format already set, can only be set once */
1066 else if (strcmp(str, "semicolon") == 0)
1070 preproc_expr(&val, NULL);
1071 sc_needsemicolon = (int)val;
1073 else if (strcmp(str, "tabsize") == 0)
1077 preproc_expr(&val, NULL);
1078 sc_tabsize = (int)val;
1080 else if (strcmp(str, "align") == 0)
1082 sc_alignnext = TRUE;
1084 else if (strcmp(str, "unused") == 0)
1086 char name[sNAMEMAX + 1];
1093 while (*lptr <= ' ' && *lptr != '\0')
1095 for (i = 0; i < sizeof name && isalpha(*lptr);
1099 /* get the symbol */
1100 sym = findloc(name);
1102 sym = findglb(name);
1105 sym->usage |= uREAD;
1106 if (sym->ident == iVARIABLE
1107 || sym->ident == iREFERENCE
1108 || sym->ident == iARRAY
1109 || sym->ident == iREFARRAY)
1110 sym->usage |= uWRITTEN;
1114 error(17, name); /* undefined symbol */
1116 /* see if a comma follows the name */
1117 while (*lptr <= ' ' && *lptr != '\0')
1119 comma = (*lptr == ',');
1127 error(207); /* unknown #pragma */
1132 error(207); /* unknown #pragma */
1142 assert(inpf != NULL);
1143 if (inpf != inpf_org)
1151 /* write opcode to output file */
1155 while (*lptr <= ' ' && *lptr != '\0')
1157 for (i = 0; i < 40 && (isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1158 name[i] = (char)tolower(*lptr);
1163 code_idx += opcodes(1);
1164 /* write parameter (if any) */
1165 while (*lptr <= ' ' && *lptr != '\0')
1171 tok = lex(&val, &str);
1177 code_idx += opargs(1);
1183 if (sym == NULL || (sym->ident != iFUNCTN
1184 && sym->ident != iREFFUNC
1185 && (sym->usage & uDEFINE) == 0))
1187 error(17, str); /* undefined symbol */
1191 outval(sym->addr, FALSE);
1192 /* mark symbol as "used", unknown whether for read or write */
1193 markusage(sym, uREAD | uWRITTEN);
1194 code_idx += opargs(1);
1200 extern char *sc_tokens[]; /* forward declaration */
1203 sprintf(s2, "%c", (char)tok);
1205 strcpy(s2, sc_tokens[tok - tFIRST]);
1206 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1216 #if !defined NO_DEFINE
1222 char *pattern, *substitution;
1224 int count, prefixlen;
1227 /* find the pattern to match */
1228 while (*lptr <= ' ' && *lptr != '\0')
1230 start = lptr; /* save starting point of the match pattern */
1232 while (*lptr > ' ' && *lptr != '\0')
1234 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1238 /* check pattern to match */
1239 if (!isalpha(*start) && *start != '_')
1241 error(74); /* pattern must start with an alphabetic character */
1244 /* store matched pattern */
1245 pattern = malloc(count + 1);
1246 if (pattern == NULL)
1247 error(103); /* insufficient memory */
1253 assert(*lptr != '\0');
1254 pattern[count++] = (char)litchar(&lptr, FALSE);
1256 pattern[count] = '\0';
1257 /* special case, erase trailing variable, because it could match anything */
1258 if (count >= 2 && isdigit(pattern[count - 1])
1259 && pattern[count - 2] == '%')
1260 pattern[count - 2] = '\0';
1261 /* find substitution string */
1262 while (*lptr <= ' ' && *lptr != '\0')
1264 start = lptr; /* save starting point of the match pattern */
1267 while (*lptr != '\0')
1269 /* keep position of the start of trailing whitespace */
1284 /* store matched substitution */
1285 substitution = malloc(count + 1); /* +1 for '\0' */
1286 if (substitution == NULL)
1287 error(103); /* insufficient memory */
1293 assert(*lptr != '\0');
1294 substitution[count++] = *lptr++;
1296 substitution[count] = '\0';
1297 /* check whether the definition already exists */
1298 for (prefixlen = 0, start = pattern;
1299 isalpha(*start) || isdigit(*start) || *start == '_';
1300 prefixlen++, start++)
1302 assert(prefixlen > 0);
1303 if ((def = find_subst(pattern, prefixlen)) != NULL)
1305 if (strcmp(def->first, pattern) != 0
1306 || strcmp(def->second, substitution) != 0)
1307 error(201, pattern); /* redefinition of macro (non-identical) */
1308 delete_subst(pattern, prefixlen);
1310 /* add the pattern/substitution pair to the list */
1311 assert(pattern[0] != '\0');
1312 insert_subst(pattern, substitution, prefixlen);
1321 if (lex(&val, &str) == tSYMBOL)
1323 if (!delete_subst(str, strlen(str)))
1324 error(17, str); /* undefined symbol */
1328 error(20, str); /* invalid symbol name */
1335 error(31); /* unknown compiler directive */
1336 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1341 #if !defined NO_DEFINE
1343 is_startstring(char *string)
1345 if (*string == '\"' || *string == '\'')
1346 return TRUE; /* "..." */
1351 if (*string == '\"' || *string == '\'')
1352 return TRUE; /* !"..." */
1353 if (*string == sc_ctrlchar)
1356 if (*string == '\"' || *string == '\'')
1357 return TRUE; /* !\"..." */
1360 else if (*string == sc_ctrlchar)
1363 if (*string == '\"' || *string == '\'')
1364 return TRUE; /* \"..." */
1368 if (*string == '\"' || *string == '\'')
1369 return TRUE; /* \!"..." */
1377 skipstring(char *string)
1380 int rawstring = FALSE;
1382 while (*string == '!' || *string == sc_ctrlchar)
1384 rawstring = (*string == sc_ctrlchar);
1389 assert(endquote == '\"' || endquote == '\'');
1390 string++; /* skip open quote */
1391 while (*string != endquote && *string != '\0')
1392 litchar(&string, rawstring);
1397 skippgroup(char *string)
1400 char open = *string;
1419 close = '\0'; /* only to avoid a compiler warning */
1423 while (*string != close || nest > 0)
1425 if (*string == open)
1427 else if (*string == close)
1429 else if (is_startstring(string))
1430 string = skipstring(string);
1431 if (*string == '\0')
1439 strdel(char *str, size_t len)
1441 size_t length = strlen(str);
1445 memmove(str, str + len, length - len + 1); /* include EOS byte */
1450 strins(char *dest, char *src, size_t srclen)
1452 size_t destlen = strlen(dest);
1454 assert(srclen <= strlen(src));
1455 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1456 memcpy(dest, src, srclen);
1461 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1464 char *p, *s, *e, *args[10];
1465 int match, arg, len;
1467 memset(args, 0, sizeof args);
1469 /* check the length of the prefix */
1470 for (prefixlen = 0, s = pattern; isalpha(*s) || isdigit(*s) || *s == '_';
1473 assert(prefixlen > 0);
1474 assert(strncmp(line, pattern, prefixlen) == 0);
1476 /* pattern prefix matches; match the rest of the pattern, gather
1479 s = line + prefixlen;
1480 p = pattern + prefixlen;
1481 match = TRUE; /* so far, pattern matches */
1482 while (match && *s != '\0' && *p != '\0')
1490 assert(arg >= 0 && arg <= 9);
1491 p++; /* skip parameter id */
1493 /* match the source string up to the character after the digit
1494 * (skipping strings in the process
1497 while (*e != *p && *e != '\0' && *e != '\n')
1499 if (is_startstring(e)) /* skip strings */
1501 else if (strchr("({[", *e) != NULL) /* skip parenthized groups */
1504 e++; /* skip non-alphapetic character (or closing quote of
1505 * a string, or the closing paranthese of a group) */
1507 /* store the parameter (overrule any earlier) */
1508 if (args[arg] != NULL)
1511 args[arg] = malloc(len + 1);
1512 if (args[arg] == NULL)
1513 error(103); /* insufficient memory */
1514 strncpy(args[arg], s, len);
1515 args[arg][len] = '\0';
1516 /* character behind the pattern was matched too */
1521 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1522 && !sc_needsemicolon)
1524 s = e; /* allow a trailing ; in the pattern match to end of line */
1528 assert(*e == '\0' || *e == '\n');
1539 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1541 /* source may be ';' or end of the line */
1542 while (*s <= ' ' && *s != '\0')
1543 s++; /* skip white space */
1544 if (*s != ';' && *s != '\0')
1546 p++; /* skip the semicolon in the pattern */
1552 /* skip whitespace between two non-alphanumeric characters, except
1553 * for two identical symbols
1555 assert(p > pattern);
1556 if (!alphanum(*p) && *(p - 1) != *p)
1557 while (*s <= ' ' && *s != '\0')
1558 s++; /* skip white space */
1559 ch = litchar(&p, FALSE); /* this increments "p" */
1563 s++; /* this character matches */
1567 if (match && *p == '\0')
1569 /* if the last character to match is an alphanumeric character, the
1570 * current character in the source may not be alphanumeric
1572 assert(p > pattern);
1573 if (alphanum(*(p - 1)) && alphanum(*s))
1579 /* calculate the length of the substituted string */
1580 for (e = substitution, len = 0; *e != '\0'; e++)
1582 if (*e == '%' && isdigit(*(e + 1)))
1584 arg = *(e + 1) - '0';
1585 assert(arg >= 0 && arg <= 9);
1586 if (args[arg] != NULL)
1587 len += strlen(args[arg]);
1588 e++; /* skip %, digit is skipped later */
1595 /* check length of the string after substitution */
1596 if (strlen(line) + len - (int)(s - line) > buffersize)
1598 error(75); /* line too long */
1602 /* substitute pattern */
1603 strdel(line, (int)(s - line));
1604 for (e = substitution, s = line; *e != '\0'; e++)
1606 if (*e == '%' && isdigit(*(e + 1)))
1608 arg = *(e + 1) - '0';
1609 assert(arg >= 0 && arg <= 9);
1610 if (args[arg] != NULL)
1612 strins(s, args[arg], strlen(args[arg]));
1613 s += strlen(args[arg]);
1615 e++; /* skip %, digit is skipped later */
1626 for (arg = 0; arg < 10; arg++)
1627 if (args[arg] != NULL)
1634 substallpatterns(char *line, int buffersize)
1641 while (*start != '\0')
1643 /* find the start of a prefix (skip all non-alphabetic characters),
1646 while (!isalpha(*start) && *start != '_' && *start != '\0')
1649 if (is_startstring(start))
1651 start = skipstring(start);
1653 break; /* abort loop on error */
1655 start++; /* skip non-alphapetic character (or closing quote of a string) */
1658 break; /* abort loop on error */
1659 /* get the prefix (length), look for a matching definition */
1662 while (isalpha(*end) || isdigit(*end) || *end == '_')
1667 assert(prefixlen > 0);
1668 subst = find_subst(start, prefixlen);
1671 /* properly match the pattern and substitute */
1673 (start, buffersize - (start - line), subst->first,
1675 start = end; /* match failed, skip this prefix */
1676 /* match succeeded: do not update "start", because the substitution text
1677 * may be matched by other macros
1682 start = end; /* no macro with this prefix, skip this prefix */
1690 * Reads a line by readline() into "pline" and performs basic preprocessing:
1691 * deleting comments, skipping lines with false "#if.." code and recognizing
1692 * other compiler directives. There is an indirect recursion: lex() calls
1693 * preprocess() if a new line must be read, preprocess() calls command(),
1694 * which at his turn calls lex() to identify the token.
1696 * Global references: lptr (altered)
1698 * freading (referred to only)
1710 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1711 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1712 iscommand = command();
1713 if (iscommand != CMD_NONE)
1714 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1715 #if !defined NO_DEFINE
1716 if (iscommand == CMD_NONE)
1718 assert(lptr != term_expr);
1719 substallpatterns(pline, sLINEMAX);
1720 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1724 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1728 unpackedstring(char *lptr, int rawstring)
1730 while (*lptr != '\0')
1732 /* check for doublequotes indicating the end of the string */
1735 /* check whether there's another pair of quotes following.
1736 * If so, paste the two strings together, thus
1737 * "pants""off" becomes "pantsoff"
1739 if (*(lptr + 1) == '\"')
1746 { /* ignore '\a' (which was inserted at a line concatenation) */
1750 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1752 stowlit(0); /* terminate string */
1757 packedstring(char *lptr, int rawstring)
1762 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1764 while (*lptr != '\0')
1766 /* check for doublequotes indicating the end of the string */
1769 /* check whether there's another pair of quotes following.
1770 * If so, paste the two strings together, thus
1771 * "pants""off" becomes "pantsoff"
1773 if (*(lptr + 1) == '\"')
1780 { /* ignore '\a' (which was inserted at a line concatenation) */
1784 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1785 if (c >= (ucell) (1 << charbits))
1786 error(43); /* character constant exceeds range */
1787 val |= (c << 8 * i);
1793 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1795 /* save last code; make sure there is at least one terminating zero character */
1796 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1797 stowlit(val); /* at least one zero character in "val" */
1799 stowlit(0); /* add full cell of zeros */
1803 /* lex(lexvalue,lexsym) Lexical Analysis
1805 * lex() first deletes leading white space, then checks for multi-character
1806 * operators, keywords (including most compiler directives), numbers,
1807 * labels, symbols and literals (literal characters are converted to a number
1808 * and are returned as such). If every check fails, the line must contain
1809 * a single-character operator. So, lex() returns this character. In the other
1810 * case (something did match), lex() returns the number of the token. All
1811 * these tokens have been assigned numbers above 255.
1813 * Some tokens have "attributes":
1814 * tNUMBER the value of the number is return in "lexvalue".
1815 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1816 * encoding in "lexvalue".
1817 * tSYMBOL the first sNAMEMAX characters of the symbol are
1818 * stored in a buffer, a pointer to this buffer is
1819 * returned in "lexsym".
1820 * tLABEL the first sNAMEMAX characters of the label are
1821 * stored in a buffer, a pointer to this buffer is
1822 * returned in "lexsym".
1823 * tSTRING the string is stored in the literal pool, the index
1824 * in the literal pool to this string is stored in
1827 * lex() stores all information (the token found and possibly its attribute)
1828 * in global variables. This allows a token to be examined twice. If "_pushed"
1829 * is true, this information is returned.
1831 * Global references: lptr (altered)
1832 * fline (referred to only)
1833 * litidx (referred to only)
1834 * _lextok, _lexval, _lexstr
1840 static cell _lexval;
1841 static char _lexstr[sLINEMAX + 1];
1842 static int _lexnewline;
1847 stkidx = 0; /* index for pushstk() and popstk() */
1848 iflevel = 0; /* preprocessor: nesting of "#if" */
1849 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1850 icomment = FALSE; /* currently not in a multiline comment */
1851 _pushed = FALSE; /* no token pushed back into lex */
1852 _lexnewline = FALSE;
1855 char *sc_tokens[] = {
1856 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1857 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1859 "assert", "break", "case", "char", "const", "continue", "default",
1860 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1861 "if", "native", "new", "operator", "public", "return", "sizeof",
1862 "sleep", "static", "stock", "switch", "tagof", "while",
1863 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1864 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1865 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1866 "-label-", "-string-"
1870 lex(cell * lexvalue, char **lexsym)
1872 int i, toolong, newline, rawstring;
1877 _pushed = FALSE; /* reset "_pushed" flag */
1878 *lexvalue = _lexval;
1883 _lextok = 0; /* preset all values */
1886 *lexvalue = _lexval;
1888 _lexnewline = FALSE;
1892 newline = (lptr == pline); /* does lptr point to start of line buffer */
1893 while (*lptr <= ' ')
1894 { /* delete leading white space */
1897 preprocess(); /* preprocess resets "lptr" */
1900 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1901 return (_lextok = tENDEXPR);
1902 _lexnewline = TRUE; /* set this after preprocess(), because
1903 * preprocess() calls lex() recursively */
1914 for (i = 0; i < (int)(lptr - pline); i++)
1915 if (pline[i] == '\t' && sc_tabsize > 0)
1917 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1924 while (i <= tMIDDLE)
1925 { /* match multi-character operators */
1926 if (match(*tokptr, FALSE))
1935 { /* match reserved words and compiler directives */
1936 if (match(*tokptr, TRUE))
1939 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1946 if ((i = number(&_lexval, lptr)) != 0)
1949 *lexvalue = _lexval;
1952 else if ((i = ftoi(&_lexval, lptr)) != 0)
1954 _lextok = tRATIONAL;
1955 *lexvalue = _lexval;
1958 else if (alpha(*lptr))
1959 { /* symbol or label */
1960 /* Note: only sNAMEMAX characters are significant. The compiler
1961 * generates a warning if a symbol exceeds this length.
1966 while (alphanum(*lptr))
1977 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1978 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
1980 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
1982 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
1984 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
1986 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
1988 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
1989 lptr += 1; /* skip colon */
1992 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
1993 { /* unpacked string literal */
1995 rawstring = (*lptr == sc_ctrlchar);
1996 *lexvalue = _lexval = litidx;
1997 lptr += 1; /* skip double quote */
1999 lptr += 1; /* skip "escape" character too */
2001 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2004 lptr += 1; /* skip final quote */
2006 error(37); /* invalid (non-terminated) string */
2008 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2009 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2010 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2011 && *(lptr + 2) == '\"'))
2012 { /* packed string literal */
2014 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2015 *lexvalue = _lexval = litidx;
2016 lptr += 2; /* skip exclamation point and double quote */
2018 lptr += 1; /* skip "escape" character too */
2020 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2023 lptr += 1; /* skip final quote */
2025 error(37); /* invalid (non-terminated) string */
2027 else if (*lptr == '\'')
2028 { /* character literal */
2029 lptr += 1; /* skip quote */
2031 *lexvalue = _lexval = litchar(&lptr, FALSE);
2033 lptr += 1; /* skip final quote */
2035 error(27); /* invalid character constant (must be one character) */
2037 else if (*lptr == ';')
2038 { /* semicolumn resets "error" flag */
2041 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2045 _lextok = *lptr; /* if every match fails, return the character */
2046 lptr += 1; /* increase the "lptr" pointer */
2053 * Pushes a token back, so the next call to lex() will return the token
2054 * last examined, instead of a new token.
2056 * Only one token can be pushed back.
2058 * In fact, lex() already stores the information it finds into global
2059 * variables, so all that is to be done is set a flag that informs lex()
2060 * to read and return the information from these variables, rather than
2061 * to read in a new token from the input file.
2066 assert(_pushed == FALSE);
2072 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2073 * symbol (a not continue with some old one). This is required upon return
2074 * from Assembler mode.
2082 lptr = strchr(pline, '\0');
2083 assert(lptr != NULL);
2089 * This routine is useful if only a simple check is needed. If the token
2090 * differs from the one expected, it is pushed back.
2093 matchtoken(int token)
2099 tok = lex(&val, &str);
2100 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2104 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2106 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2118 * Returns additional information of a token after using "matchtoken()"
2119 * or needtoken(). It does no harm using this routine after a call to
2120 * "lex()", but lex() already returns the same information.
2122 * The token itself is the return value. Normally, this one is already known.
2125 tokeninfo(cell * val, char **str)
2127 /* if the token was pushed back, tokeninfo() returns the token and
2128 * parameters of the *next* token, not of the *current* token.
2138 * This routine checks for a required token and gives an error message if
2139 * it isn't there (and returns FALSE in that case).
2141 * Global references: _lextok;
2144 needtoken(int token)
2146 char s1[20], s2[20];
2148 if (matchtoken(token))
2154 /* token already pushed back */
2157 sprintf(s1, "%c", (char)token); /* single character token */
2159 strcpy(s1, sc_tokens[token - tFIRST]); /* multi-character symbol */
2161 strcpy(s2, "-end of file-");
2162 else if (_lextok < 256)
2163 sprintf(s2, "%c", (char)_lextok);
2165 strcpy(s2, sc_tokens[_lextok - tFIRST]);
2166 error(1, s1, s2); /* expected ..., but found ... */
2173 * Compares a series of characters from the input file with the characters
2174 * in "st" (that contains a token). If the token on the input file matches
2175 * "st", the input file pointer "lptr" is adjusted to point to the next
2176 * token, otherwise "lptr" remains unaltered.
2178 * If the parameter "end: is true, match() requires that the first character
2179 * behind the recognized token is non-alphanumeric.
2181 * Global references: lptr (altered)
2184 match(char *st, int end)
2199 { /* symbol must terminate with non-alphanumeric char */
2203 lptr = ptr; /* match found, skip symbol */
2209 * Stores a value into the literal queue. The literal queue is used for
2210 * literal strings used in functions and for initializing array variables.
2212 * Global references: litidx (altered)
2218 if (litidx >= litmax)
2222 litmax += sDEF_LITMAX;
2223 p = (cell *) realloc(litq, litmax * sizeof(cell));
2225 error(102, "literal table"); /* literal table overflow (fatal error) */
2228 assert(litidx < litmax);
2229 litq[litidx++] = value;
2234 * Return current literal character and increase the pointer to point
2235 * just behind this literal character.
2237 * Note: standard "escape sequences" are suported, but the backslash may be
2238 * replaced by another character; the syntax '\ddd' is supported,
2239 * but ddd must be decimal!
2242 litchar(char **lptr, int rawmode)
2245 unsigned char *cptr;
2247 cptr = (unsigned char *)*lptr;
2248 if (rawmode || *cptr != sc_ctrlchar)
2249 { /* no escape character */
2256 if (*cptr == sc_ctrlchar)
2258 c = *cptr; /* \\ == \ (the escape character itself) */
2265 case 'a': /* \a == audible alarm */
2269 case 'b': /* \b == backspace */
2273 case 'e': /* \e == escape */
2277 case 'f': /* \f == form feed */
2281 case 'n': /* \n == NewLine character */
2285 case 'r': /* \r == carriage return */
2289 case 't': /* \t == horizontal TAB */
2293 case 'v': /* \v == vertical TAB */
2297 case '\'': /* \' == ' (single quote) */
2298 case '"': /* \" == " (single quote) */
2299 case '%': /* \% == % (percent) */
2307 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2308 c = c * 10 + *cptr++ - '0';
2310 cptr++; /* swallow a trailing ';' */
2314 error(27); /* invalid character constant */
2319 *lptr = (char *)cptr;
2320 assert(c >= 0 && c < 256);
2326 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2327 * or an "at" sign ("@"). The "@" is an extension to standard C.
2332 return (isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2337 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2342 return (alpha(c) || isdigit(c));
2345 /* The local variable table must be searched backwards, so that the deepest
2346 * nesting of local variables is searched first. The simplest way to do
2347 * this is to insert all new items at the head of the list.
2348 * In the global list, the symbols are kept in sorted order, so that the
2349 * public functions are written in sorted order.
2352 add_symbol(symbol * root, symbol * entry, int sort)
2357 while (root->next != NULL && strcmp(entry->name, root->next->name) > 0)
2360 if ((newsym = (symbol *) malloc(sizeof(symbol))) == NULL)
2365 memcpy(newsym, entry, sizeof(symbol));
2366 newsym->next = root->next;
2367 root->next = newsym;
2372 free_symbol(symbol * sym)
2376 /* free all sub-symbol allocated memory blocks, depending on the
2377 * kind of the symbol
2379 assert(sym != NULL);
2380 if (sym->ident == iFUNCTN)
2382 /* run through the argument list; "default array" arguments
2383 * must be freed explicitly; the tag list must also be freed */
2384 assert(sym->dim.arglist != NULL);
2385 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2387 if (arg->ident == iREFARRAY && arg->hasdefault)
2388 free(arg->defvalue.array.data);
2389 else if (arg->ident == iVARIABLE
2390 && ((arg->hasdefault & uSIZEOF) != 0
2391 || (arg->hasdefault & uTAGOF) != 0))
2392 free(arg->defvalue.size.symname);
2393 assert(arg->tags != NULL);
2396 free(sym->dim.arglist);
2398 assert(sym->refer != NULL);
2404 delete_symbol(symbol * root, symbol * sym)
2406 /* find the symbol and its predecessor
2407 * (this function assumes that you will never delete a symbol that is not
2408 * in the table pointed at by "root")
2410 assert(root != sym);
2411 while (root->next != sym)
2414 assert(root != NULL);
2417 /* unlink it, then free it */
2418 root->next = sym->next;
2423 delete_symbols(symbol * root, int level, int delete_labels,
2424 int delete_functions)
2428 /* erase only the symbols with a deeper nesting level than the
2429 * specified nesting level */
2430 while (root->next != NULL)
2433 if (sym->compound < level)
2435 if ((delete_labels || sym->ident != iLABEL)
2436 && (delete_functions || sym->ident != iFUNCTN
2437 || (sym->usage & uNATIVE) != 0) && (delete_functions
2438 || sym->ident != iCONSTEXPR
2439 || (sym->usage & uPREDEF) ==
2440 0) && (delete_functions
2446 root->next = sym->next;
2451 /* if the function was prototyped, but not implemented in this source,
2452 * mark it as such, so that its use can be flagged
2454 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2455 sym->usage |= uMISSING;
2456 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2457 || sym->ident == iARRAY)
2458 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2459 /* for user defined operators, also remove the "prototyped" flag, as
2460 * user-defined operators *must* be declared before use
2462 if (sym->ident == iFUNCTN && !isalpha(*sym->name)
2463 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2464 sym->usage &= ~uPROTOTYPED;
2465 root = sym; /* skip the symbol */
2470 /* The purpose of the hash is to reduce the frequency of a "name"
2471 * comparison (which is costly). There is little interest in avoiding
2472 * clusters in similar names, which is why this function is plain simple.
2475 namehash(char *name)
2477 unsigned char *ptr = (unsigned char *)name;
2478 int len = strlen(name);
2483 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2488 find_symbol(symbol * root, char *name, int fnumber)
2490 symbol *ptr = root->next;
2491 unsigned long hash = namehash(name);
2495 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2496 && ptr->parent == NULL && (ptr->fnumber < 0
2497 || ptr->fnumber == fnumber))
2505 find_symbol_child(symbol * root, symbol * sym)
2507 symbol *ptr = root->next;
2511 if (ptr->parent == sym)
2518 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2519 * bywhom will be the function that uses a variable or that calls
2523 refer_symbol(symbol * entry, symbol * bywhom)
2527 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2528 assert(entry != NULL);
2529 assert(entry->refer != NULL);
2531 /* see if it is already there */
2532 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2535 if (count < entry->numrefers)
2537 assert(entry->refer[count] == bywhom);
2541 /* see if there is an empty spot in the referrer list */
2542 for (count = 0; count < entry->numrefers && entry->refer[count] != NULL;
2545 assert(count <= entry->numrefers);
2546 if (count == entry->numrefers)
2549 int newsize = 2 * entry->numrefers;
2551 assert(newsize > 0);
2552 /* grow the referrer list */
2553 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2555 return FALSE; /* insufficient memory */
2556 /* initialize the new entries */
2557 entry->refer = refer;
2558 for (count = entry->numrefers; count < newsize; count++)
2559 entry->refer[count] = NULL;
2560 count = entry->numrefers; /* first empty spot */
2561 entry->numrefers = newsize;
2564 /* add the referrer */
2565 assert(entry->refer[count] == NULL);
2566 entry->refer[count] = bywhom;
2571 markusage(symbol * sym, int usage)
2573 sym->usage |= (char)usage;
2574 /* check if (global) reference must be added to the symbol */
2575 if ((usage & (uREAD | uWRITTEN)) != 0)
2577 /* only do this for global symbols */
2578 if (sym->vclass == sGLOBAL)
2580 /* "curfunc" should always be valid, since statements may not occurs
2581 * outside functions; in the case of syntax errors, however, the
2582 * compiler may arrive through this function
2584 if (curfunc != NULL)
2585 refer_symbol(sym, curfunc);
2592 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2597 return find_symbol(&glbtab, name, fcurrent);
2602 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2603 * See add_symbol() how the deepest nesting level is searched first.
2608 return find_symbol(&loctab, name, -1);
2612 findconst(char *name)
2616 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2617 if (sym == NULL || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2618 sym = find_symbol(&glbtab, name, fcurrent);
2619 if (sym == NULL || sym->ident != iCONSTEXPR)
2621 assert(sym->parent == NULL); /* constants have no hierarchy */
2626 finddepend(symbol * parent)
2630 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2631 if (sym == NULL) /* not found */
2632 sym = find_symbol_child(&glbtab, parent);
2638 * Adds a symbol to the symbol table (either global or local variables,
2639 * or global and local constants).
2642 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2644 symbol entry, **refer;
2646 /* global variables/constants/functions may only be defined once */
2647 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2648 || findglb(name) == NULL);
2649 /* labels may only be defined once */
2650 assert(ident != iLABEL || findloc(name) == NULL);
2652 /* create an empty referrer list */
2653 if ((refer = (symbol **) malloc(sizeof(symbol *))) == NULL)
2655 error(103); /* insufficient memory */
2660 /* first fill in the entry */
2661 strcpy(entry.name, name);
2662 entry.hash = namehash(name);
2664 entry.vclass = (char)vclass;
2665 entry.ident = (char)ident;
2667 entry.usage = (char)usage;
2668 entry.compound = 0; /* may be overridden later */
2669 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2670 entry.numrefers = 1;
2671 entry.refer = refer;
2672 entry.parent = NULL;
2674 /* then insert it in the list */
2675 if (vclass == sGLOBAL)
2676 return add_symbol(&glbtab, &entry, TRUE);
2678 return add_symbol(&loctab, &entry, FALSE);
2682 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2683 int dim[], int numdim, int idxtag[])
2685 symbol *sym, *parent, *top;
2688 /* global variables may only be defined once */
2689 assert(vclass != sGLOBAL || (sym = findglb(name)) == NULL
2690 || (sym->usage & uDEFINE) == 0);
2692 if (ident == iARRAY || ident == iREFARRAY)
2695 sym = NULL; /* to avoid a compiler warning */
2696 for (level = 0; level < numdim; level++)
2698 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2699 top->dim.array.length = dim[level];
2700 top->dim.array.level = (short)(numdim - level - 1);
2701 top->x.idxtag = idxtag[level];
2702 top->parent = parent;
2710 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2717 * Return next available internal label number.
2727 * Converts a number to a hexadecimal string and returns a pointer to that
2733 static char itohstr[15]; /* hex number is 10 characters long at most */
2735 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2744 for (i = 0; i < max; i += 1)
2746 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2750 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2754 if (nibble[i] >= 10)
2755 *ptr++ = (char)('a' + (nibble[i] - 10));
2757 *ptr++ = (char)('0' + nibble[i]);
2760 *ptr = '\0'; /* and a zero-terminator */