From 40675e7cc90b44d3c82c424946ea55083eb78121 Mon Sep 17 00:00:00 2001 From: David MacKenzie Date: Tue, 20 Apr 1993 05:55:54 +0000 Subject: [PATCH] entered into RCS --- src/LR0.c | 704 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/allocate.c | 64 ++++++ src/files.c | 10 +- src/lex.c | 504 +++++++++++++++++++++++++++++++++++++++++ src/symtab.c | 150 ++++++++++++ 5 files changed, 1427 insertions(+), 5 deletions(-) create mode 100644 src/LR0.c create mode 100644 src/allocate.c create mode 100644 src/lex.c create mode 100644 src/symtab.c diff --git a/src/LR0.c b/src/LR0.c new file mode 100644 index 0000000..77cc025 --- /dev/null +++ b/src/LR0.c @@ -0,0 +1,704 @@ +/* Generate the nondeterministic finite state machine for bison, + Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc. + +This file is part of Bison, the GNU Compiler Compiler. + +Bison is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Bison is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Bison; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +/* See comments in state.h for the data structures that represent it. + The entry point is generate_states. */ + +#include +#include "system.h" +#include "machine.h" +#include "new.h" +#include "gram.h" +#include "state.h" + + +extern char *nullable; +extern short *itemset; +extern short *itemsetend; + + +int nstates; +int final_state; +core *first_state; +shifts *first_shift; +reductions *first_reduction; + +int get_state(); +core *new_state(); + +void new_itemsets(); +void append_states(); +void initialize_states(); +void save_shifts(); +void save_reductions(); +void augment_automaton(); +void insert_start_shift(); +extern void initialize_closure(); +extern void closure(); +extern void finalize_closure(); +extern void toomany(); + +static core *this_state; +static core *last_state; +static shifts *last_shift; +static reductions *last_reduction; + +static int nshifts; +static short *shift_symbol; + +static short *redset; +static short *shiftset; + +static short **kernel_base; +static short **kernel_end; +static short *kernel_items; + +/* hash table for states, to recognize equivalent ones. */ + +#define STATE_TABLE_SIZE 1009 +static core **state_table; + + + +void +allocate_itemsets() +{ + register short *itemp; + register int symbol; + register int i; + register int count; + register short *symbol_count; + + count = 0; + symbol_count = NEW2(nsyms, short); + + itemp = ritem; + symbol = *itemp++; + while (symbol) + { + if (symbol > 0) + { + count++; + symbol_count[symbol]++; + } + symbol = *itemp++; + } + + /* see comments before new_itemsets. All the vectors of items + live inside kernel_items. The number of active items after + some symbol cannot be more than the number of times that symbol + appears as an item, which is symbol_count[symbol]. + We allocate that much space for each symbol. */ + + kernel_base = NEW2(nsyms, short *); + kernel_items = NEW2(count, short); + + count = 0; + for (i = 0; i < nsyms; i++) + { + kernel_base[i] = kernel_items + count; + count += symbol_count[i]; + } + + shift_symbol = symbol_count; + kernel_end = NEW2(nsyms, short *); +} + + +void +allocate_storage() +{ + allocate_itemsets(); + + shiftset = NEW2(nsyms, short); + redset = NEW2(nrules + 1, short); + state_table = NEW2(STATE_TABLE_SIZE, core *); +} + + +void +free_storage() +{ + FREE(shift_symbol); + FREE(redset); + FREE(shiftset); + FREE(kernel_base); + FREE(kernel_end); + FREE(kernel_items); + FREE(state_table); +} + + + +/* compute the nondeterministic finite state machine (see state.h for details) +from the grammar. */ +void +generate_states() +{ + allocate_storage(); + initialize_closure(nitems); + initialize_states(); + + while (this_state) + { + /* Set up ruleset and itemset for the transitions out of this state. + ruleset gets a 1 bit for each rule that could reduce now. + itemset gets a vector of all the items that could be accepted next. */ + closure(this_state->items, this_state->nitems); + /* record the reductions allowed out of this state */ + save_reductions(); + /* find the itemsets of the states that shifts can reach */ + new_itemsets(); + /* find or create the core structures for those states */ + append_states(); + + /* create the shifts structures for the shifts to those states, + now that the state numbers transitioning to are known */ + if (nshifts > 0) + save_shifts(); + + /* states are queued when they are created; process them all */ + this_state = this_state->next; + } + + /* discard various storage */ + finalize_closure(); + free_storage(); + + /* set up initial and final states as parser wants them */ + augment_automaton(); +} + + + +/* Find which symbols can be shifted in the current state, + and for each one record which items would be active after that shift. + Uses the contents of itemset. + shift_symbol is set to a vector of the symbols that can be shifted. + For each symbol in the grammar, kernel_base[symbol] points to + a vector of item numbers activated if that symbol is shifted, + and kernel_end[symbol] points after the end of that vector. */ +void +new_itemsets() +{ + register int i; + register int shiftcount; + register short *isp; + register short *ksp; + register int symbol; + +#ifdef TRACE + fprintf(stderr, "Entering new_itemsets\n"); +#endif + + for (i = 0; i < nsyms; i++) + kernel_end[i] = NULL; + + shiftcount = 0; + + isp = itemset; + + while (isp < itemsetend) + { + i = *isp++; + symbol = ritem[i]; + if (symbol > 0) + { + ksp = kernel_end[symbol]; + + if (!ksp) + { + shift_symbol[shiftcount++] = symbol; + ksp = kernel_base[symbol]; + } + + *ksp++ = i + 1; + kernel_end[symbol] = ksp; + } + } + + nshifts = shiftcount; +} + + + +/* Use the information computed by new_itemsets to find the state numbers + reached by each shift transition from the current state. + + shiftset is set up as a vector of state numbers of those states. */ +void +append_states() +{ + register int i; + register int j; + register int symbol; + +#ifdef TRACE + fprintf(stderr, "Entering append_states\n"); +#endif + + /* first sort shift_symbol into increasing order */ + + for (i = 1; i < nshifts; i++) + { + symbol = shift_symbol[i]; + j = i; + while (j > 0 && shift_symbol[j - 1] > symbol) + { + shift_symbol[j] = shift_symbol[j - 1]; + j--; + } + shift_symbol[j] = symbol; + } + + for (i = 0; i < nshifts; i++) + { + symbol = shift_symbol[i]; + shiftset[i] = get_state(symbol); + } +} + + + +/* find the state number for the state we would get to +(from the current state) by shifting symbol. +Create a new state if no equivalent one exists already. +Used by append_states */ + +int +get_state(symbol) +int symbol; +{ + register int key; + register short *isp1; + register short *isp2; + register short *iend; + register core *sp; + register int found; + + int n; + +#ifdef TRACE + fprintf(stderr, "Entering get_state, symbol = %d\n", symbol); +#endif + + isp1 = kernel_base[symbol]; + iend = kernel_end[symbol]; + n = iend - isp1; + + /* add up the target state's active item numbers to get a hash key */ + key = 0; + while (isp1 < iend) + key += *isp1++; + + key = key % STATE_TABLE_SIZE; + + sp = state_table[key]; + + if (sp) + { + found = 0; + while (!found) + { + if (sp->nitems == n) + { + found = 1; + isp1 = kernel_base[symbol]; + isp2 = sp->items; + + while (found && isp1 < iend) + { + if (*isp1++ != *isp2++) + found = 0; + } + } + + if (!found) + { + if (sp->link) + { + sp = sp->link; + } + else /* bucket exhausted and no match */ + { + sp = sp->link = new_state(symbol); + found = 1; + } + } + } + } + else /* bucket is empty */ + { + state_table[key] = sp = new_state(symbol); + } + + return (sp->number); +} + + + +/* subroutine of get_state. create a new state for those items, if necessary. */ + +core * +new_state(symbol) +int symbol; +{ + register int n; + register core *p; + register short *isp1; + register short *isp2; + register short *iend; + +#ifdef TRACE + fprintf(stderr, "Entering new_state, symbol = %d\n", symbol); +#endif + + if (nstates >= MAXSHORT) + toomany("states"); + + isp1 = kernel_base[symbol]; + iend = kernel_end[symbol]; + n = iend - isp1; + + p = (core *) xmalloc((unsigned) (sizeof(core) + (n - 1) * sizeof(short))); + p->accessing_symbol = symbol; + p->number = nstates; + p->nitems = n; + + isp2 = p->items; + while (isp1 < iend) + *isp2++ = *isp1++; + + last_state->next = p; + last_state = p; + + nstates++; + + return (p); +} + + +void +initialize_states() +{ + register core *p; +/* register unsigned *rp1; JF unused */ +/* register unsigned *rp2; JF unused */ +/* register unsigned *rend; JF unused */ + + p = (core *) xmalloc((unsigned) (sizeof(core) - sizeof(short))); + first_state = last_state = this_state = p; + nstates = 1; +} + + +void +save_shifts() +{ + register shifts *p; + register short *sp1; + register short *sp2; + register short *send; + + p = (shifts *) xmalloc((unsigned) (sizeof(shifts) + + (nshifts - 1) * sizeof(short))); + + p->number = this_state->number; + p->nshifts = nshifts; + + sp1 = shiftset; + sp2 = p->shifts; + send = shiftset + nshifts; + + while (sp1 < send) + *sp2++ = *sp1++; + + if (last_shift) + { + last_shift->next = p; + last_shift = p; + } + else + { + first_shift = p; + last_shift = p; + } +} + + + +/* find which rules can be used for reduction transitions from the current state + and make a reductions structure for the state to record their rule numbers. */ +void +save_reductions() +{ + register short *isp; + register short *rp1; + register short *rp2; + register int item; + register int count; + register reductions *p; + + short *rend; + + /* find and count the active items that represent ends of rules */ + + count = 0; + for (isp = itemset; isp < itemsetend; isp++) + { + item = ritem[*isp]; + if (item < 0) + { + redset[count++] = -item; + } + } + + /* make a reductions structure and copy the data into it. */ + + if (count) + { + p = (reductions *) xmalloc((unsigned) (sizeof(reductions) + + (count - 1) * sizeof(short))); + + p->number = this_state->number; + p->nreds = count; + + rp1 = redset; + rp2 = p->rules; + rend = rp1 + count; + + while (rp1 < rend) + *rp2++ = *rp1++; + + if (last_reduction) + { + last_reduction->next = p; + last_reduction = p; + } + else + { + first_reduction = p; + last_reduction = p; + } + } +} + + + +/* Make sure that the initial state has a shift that accepts the +grammar's start symbol and goes to the next-to-final state, +which has a shift going to the final state, which has a shift +to the termination state. +Create such states and shifts if they don't happen to exist already. */ +void +augment_automaton() +{ + register int i; + register int k; +/* register int found; JF unused */ + register core *statep; + register shifts *sp; + register shifts *sp2; + register shifts *sp1; + + sp = first_shift; + + if (sp) + { + if (sp->number == 0) + { + k = sp->nshifts; + statep = first_state->next; + + /* The states reached by shifts from first_state are numbered 1...K. + Look for one reached by start_symbol. */ + while (statep->accessing_symbol < start_symbol + && statep->number < k) + statep = statep->next; + + if (statep->accessing_symbol == start_symbol) + { + /* We already have a next-to-final state. + Make sure it has a shift to what will be the final state. */ + k = statep->number; + + while (sp && sp->number < k) + { + sp1 = sp; + sp = sp->next; + } + + if (sp && sp->number == k) + { + sp2 = (shifts *) xmalloc((unsigned) (sizeof(shifts) + + sp->nshifts * sizeof(short))); + sp2->number = k; + sp2->nshifts = sp->nshifts + 1; + sp2->shifts[0] = nstates; + for (i = sp->nshifts; i > 0; i--) + sp2->shifts[i] = sp->shifts[i - 1]; + + /* Patch sp2 into the chain of shifts in place of sp, + following sp1. */ + sp2->next = sp->next; + sp1->next = sp2; + if (sp == last_shift) + last_shift = sp2; + FREE(sp); + } + else + { + sp2 = NEW(shifts); + sp2->number = k; + sp2->nshifts = 1; + sp2->shifts[0] = nstates; + + /* Patch sp2 into the chain of shifts between sp1 and sp. */ + sp2->next = sp; + sp1->next = sp2; + if (sp == 0) + last_shift = sp2; + } + } + else + { + /* There is no next-to-final state as yet. */ + /* Add one more shift in first_shift, + going to the next-to-final state (yet to be made). */ + sp = first_shift; + + sp2 = (shifts *) xmalloc(sizeof(shifts) + + sp->nshifts * sizeof(short)); + sp2->nshifts = sp->nshifts + 1; + + /* Stick this shift into the vector at the proper place. */ + statep = first_state->next; + for (k = 0, i = 0; i < sp->nshifts; k++, i++) + { + if (statep->accessing_symbol > start_symbol && i == k) + sp2->shifts[k++] = nstates; + sp2->shifts[k] = sp->shifts[i]; + statep = statep->next; + } + if (i == k) + sp2->shifts[k++] = nstates; + + /* Patch sp2 into the chain of shifts + in place of sp, at the beginning. */ + sp2->next = sp->next; + first_shift = sp2; + if (last_shift == sp) + last_shift = sp2; + + FREE(sp); + + /* Create the next-to-final state, with shift to + what will be the final state. */ + insert_start_shift(); + } + } + else + { + /* The initial state didn't even have any shifts. + Give it one shift, to the next-to-final state. */ + sp = NEW(shifts); + sp->nshifts = 1; + sp->shifts[0] = nstates; + + /* Patch sp into the chain of shifts at the beginning. */ + sp->next = first_shift; + first_shift = sp; + + /* Create the next-to-final state, with shift to + what will be the final state. */ + insert_start_shift(); + } + } + else + { + /* There are no shifts for any state. + Make one shift, from the initial state to the next-to-final state. */ + + sp = NEW(shifts); + sp->nshifts = 1; + sp->shifts[0] = nstates; + + /* Initialize the chain of shifts with sp. */ + first_shift = sp; + last_shift = sp; + + /* Create the next-to-final state, with shift to + what will be the final state. */ + insert_start_shift(); + } + + /* Make the final state--the one that follows a shift from the + next-to-final state. + The symbol for that shift is 0 (end-of-file). */ + statep = (core *) xmalloc((unsigned) (sizeof(core) - sizeof(short))); + statep->number = nstates; + last_state->next = statep; + last_state = statep; + + /* Make the shift from the final state to the termination state. */ + sp = NEW(shifts); + sp->number = nstates++; + sp->nshifts = 1; + sp->shifts[0] = nstates; + last_shift->next = sp; + last_shift = sp; + + /* Note that the variable `final_state' refers to what we sometimes call + the termination state. */ + final_state = nstates; + + /* Make the termination state. */ + statep = (core *) xmalloc((unsigned) (sizeof(core) - sizeof(short))); + statep->number = nstates++; + last_state->next = statep; + last_state = statep; +} + + +/* subroutine of augment_automaton. + Create the next-to-final state, to which a shift has already been made in + the initial state. */ +void +insert_start_shift() +{ + register core *statep; + register shifts *sp; + + statep = (core *) xmalloc((unsigned) (sizeof(core) - sizeof(short))); + statep->number = nstates; + statep->accessing_symbol = start_symbol; + + last_state->next = statep; + last_state = statep; + + /* Make a shift from this state to (what will be) the final state. */ + sp = NEW(shifts); + sp->number = nstates++; + sp->nshifts = 1; + sp->shifts[0] = nstates; + + last_shift->next = sp; + last_shift = sp; +} diff --git a/src/allocate.c b/src/allocate.c new file mode 100644 index 0000000..a74dc18 --- /dev/null +++ b/src/allocate.c @@ -0,0 +1,64 @@ +/* Allocate and clear storage for bison, + Copyright (C) 1984, 1989 Free Software Foundation, Inc. + +This file is part of Bison, the GNU Compiler Compiler. + +Bison is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Bison is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Bison; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +#include + +extern char *calloc (); +extern char *realloc (); +extern void done (); + +extern char *program_name; + +char * +xmalloc (n) + register unsigned n; +{ + register char *block; + + /* Avoid uncertainty about what an arg of 0 will do. */ + if (n == 0) + n = 1; + block = calloc (n, 1); + if (block == NULL) + { + fprintf (stderr, "%s: memory exhausted\n", program_name); + done (1); + } + + return (block); +} + +char * +xrealloc (block, n) + register char *block; + register unsigned n; +{ + /* Avoid uncertainty about what an arg of 0 will do. */ + if (n == 0) + n = 1; + block = realloc (block, n); + if (block == NULL) + { + fprintf (stderr, "%s: memory exhausted\n", program_name); + done (1); + } + + return (block); +} diff --git a/src/files.c b/src/files.c index 0034242..e9a28f7 100644 --- a/src/files.c +++ b/src/files.c @@ -150,7 +150,7 @@ openfiles() short_base_length = strlen (spec_file_prefix); /* Count room for `.tab'. */ base_length = short_base_length + 4; - name_base = (char *) mallocate (base_length + 1); + name_base = (char *) xmalloc (base_length + 1); /* Append `.tab'. */ strcpy (name_base, spec_file_prefix); #ifdef VMS @@ -194,9 +194,9 @@ openfiles() #ifdef MSDOS /* File doesn't exist in current directory; try in INIT directory. */ cp = getenv("INIT"); - if (filename == 0 && cp != 0) + if (filename == 0 && cp != NULL) { - filename = malloc(strlen(cp) + strlen(PFILE) + 2); + filename = xmalloc(strlen(cp) + strlen(PFILE) + 2); strcpy(filename, cp); cp = filename + strlen(filename); *cp++ = '/'; @@ -284,9 +284,9 @@ open_extra_files() #ifdef MSDOS /* File doesn't exist in current directory; try in INIT directory. */ cp = getenv("INIT"); - if (filename == 0 && cp != 0) + if (filename == 0 && cp != NULL) { - filename = malloc(strlen(cp) + strlen(PFILE1) + 2); + filename = xmalloc(strlen(cp) + strlen(PFILE1) + 2); strcpy(filename, cp); cp = filename + strlen(filename); *cp++ = '/'; diff --git a/src/lex.c b/src/lex.c new file mode 100644 index 0000000..20d89a1 --- /dev/null +++ b/src/lex.c @@ -0,0 +1,504 @@ +/* Token-reader for Bison's input parser, + Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc. + +This file is part of Bison, the GNU Compiler Compiler. + +Bison is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Bison is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Bison; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +/* + lex() is the entry point. It is called from reader.c. + It returns one of the token-type codes defined in lex.h. + When an identifier is seen, the code IDENTIFIER is returned + and the name is looked up in the symbol table using symtab.c; + symval is set to a pointer to the entry found. */ + +#include +#include +#include "system.h" +#include "files.h" +#include "symtab.h" +#include "lex.h" +#include "new.h" + + +extern int lineno; +extern int translations; + +int parse_percent_token(); + +extern void fatals(); +extern void fatal(); + +/* Buffer for storing the current token. */ +char *token_buffer; + +/* Allocated size of token_buffer, not including space for terminator. */ +static int maxtoken; + +bucket *symval; +int numval; + +static int unlexed; /* these two describe a token to be reread */ +static bucket *unlexed_symval; /* by the next call to lex */ + + +void +init_lex() +{ + maxtoken = 100; + token_buffer = NEW2 (maxtoken + 1, char); + unlexed = -1; +} + + +static char * +grow_token_buffer (p) + char *p; +{ + int offset = p - token_buffer; + maxtoken *= 2; + token_buffer = (char *) xrealloc(token_buffer, maxtoken + 1); + return token_buffer + offset; +} + + +int +skip_white_space() +{ + register int c; + register int inside; + + c = getc(finput); + + for (;;) + { + int cplus_comment; + + switch (c) + { + case '/': + c = getc(finput); + if (c != '*' && c != '/') + fatals("unexpected `/%c' found",c); + cplus_comment = (c == '/'); + + c = getc(finput); + + inside = 1; + while (inside) + { + if (!cplus_comment && c == '*') + { + while (c == '*') + c = getc(finput); + + if (c == '/') + { + inside = 0; + c = getc(finput); + } + } + else if (c == '\n') + { + lineno++; + if (cplus_comment) + inside = 0; + c = getc(finput); + } + else if (c == EOF) + fatal("unterminated comment"); + else + c = getc(finput); + } + + break; + + case '\n': + lineno++; + + case ' ': + case '\t': + case '\f': + c = getc(finput); + break; + + default: + return (c); + } + } +} + + +void +unlex(token) +int token; +{ + unlexed = token; + unlexed_symval = symval; +} + + + +int +lex() +{ + register int c; + register char *p; + + if (unlexed >= 0) + { + symval = unlexed_symval; + c = unlexed; + unlexed = -1; + return (c); + } + + c = skip_white_space(); + + switch (c) + { + case EOF: + return (ENDFILE); + + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '.': case '_': + p = token_buffer; + while (isalnum(c) || c == '_' || c == '.') + { + if (p == token_buffer + maxtoken) + p = grow_token_buffer(p); + + *p++ = c; + c = getc(finput); + } + + *p = 0; + ungetc(c, finput); + symval = getsym(token_buffer); + return (IDENTIFIER); + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + numval = 0; + + while (isdigit(c)) + { + numval = numval*10 + c - '0'; + c = getc(finput); + } + ungetc(c, finput); + return (NUMBER); + } + + case '\'': + translations = -1; + + /* parse the literal token and compute character code in code */ + + c = getc(finput); + { + register int code = 0; + + if (c == '\\') + { + c = getc(finput); + + if (c <= '7' && c >= '0') + { + while (c <= '7' && c >= '0') + { + code = (code * 8) + (c - '0'); + c = getc(finput); + if (code >= 256 || code < 0) + fatals("malformatted literal token `\\%03o'", code); + } + } + else + { + if (c == 't') + code = '\t'; + else if (c == 'n') + code = '\n'; + else if (c == 'a') + code = '\007'; + else if (c == 'r') + code = '\r'; + else if (c == 'f') + code = '\f'; + else if (c == 'b') + code = '\b'; + else if (c == 'v') + code = 013; + else if (c == 'x') + { + c = getc(finput); + while ((c <= '9' && c >= '0') + || (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z')) + { + code *= 16; + if (c <= '9' && c >= '0') + code += c - '0'; + else if (c >= 'a' && c <= 'z') + code += c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + code += c - 'A' + 10; + if (code >= 256 || code<0)/* JF this said if(c>=128) */ + fatals("malformatted literal token `\\x%x'",code); + c = getc(finput); + } + ungetc(c, finput); + } + else if (c == '\\') + code = '\\'; + else if (c == '\'') + code = '\''; + else if (c == '\"') /* JF this is a good idea */ + code = '\"'; + else + { + if (c >= 040 && c <= 0177) + fatals ("unknown escape sequence `\\%c'", c); + else + fatals ("unknown escape sequence: `\\' followed by char code 0x%x", c); + } + + c = getc(finput); + } + } + else + { + code = c; + c = getc(finput); + } + if (c != '\'') + fatal("multicharacter literal tokens not supported"); + + /* now fill token_buffer with the canonical name for this character + as a literal token. Do not use what the user typed, + so that '\012' and '\n' can be interchangeable. */ + + p = token_buffer; + *p++ = '\''; + if (code == '\\') + { + *p++ = '\\'; + *p++ = '\\'; + } + else if (code == '\'') + { + *p++ = '\\'; + *p++ = '\''; + } + else if (code >= 040 && code != 0177) + *p++ = code; + else if (code == '\t') + { + *p++ = '\\'; + *p++ = 't'; + } + else if (code == '\n') + { + *p++ = '\\'; + *p++ = 'n'; + } + else if (code == '\r') + { + *p++ = '\\'; + *p++ = 'r'; + } + else if (code == '\v') + { + *p++ = '\\'; + *p++ = 'v'; + } + else if (code == '\b') + { + *p++ = '\\'; + *p++ = 'b'; + } + else if (code == '\f') + { + *p++ = '\\'; + *p++ = 'f'; + } + else + { + *p++ = code / 0100 + '0'; + *p++ = ((code / 010) & 07) + '0'; + *p++ = (code & 07) + '0'; + } + *p++ = '\''; + *p = 0; + symval = getsym(token_buffer); + symval->class = STOKEN; + if (! symval->user_token_number) + symval->user_token_number = code; + return (IDENTIFIER); + } + + case ',': + return (COMMA); + + case ':': + return (COLON); + + case ';': + return (SEMICOLON); + + case '|': + return (BAR); + + case '{': + return (LEFT_CURLY); + + case '=': + do + { + c = getc(finput); + if (c == '\n') lineno++; + } + while(c==' ' || c=='\n' || c=='\t'); + + if (c == '{') + return(LEFT_CURLY); + else + { + ungetc(c, finput); + return(ILLEGAL); + } + + case '<': + p = token_buffer; + c = getc(finput); + while (c != '>') + { + if (c == '\n' || c == EOF) + fatal("unterminated type name"); + + if (p == token_buffer + maxtoken) + p = grow_token_buffer(p); + + *p++ = c; + c = getc(finput); + } + *p = 0; + return (TYPENAME); + + + case '%': + return (parse_percent_token()); + + default: + return (ILLEGAL); + } +} + + +/* parse a token which starts with %. Assumes the % has already been read and discarded. */ + +int +parse_percent_token () +{ + register int c; + register char *p; + + p = token_buffer; + c = getc(finput); + + switch (c) + { + case '%': + return (TWO_PERCENTS); + + case '{': + return (PERCENT_LEFT_CURLY); + + case '<': + return (LEFT); + + case '>': + return (RIGHT); + + case '2': + return (NONASSOC); + + case '0': + return (TOKEN); + + case '=': + return (PREC); + } + if (!isalpha(c)) + return (ILLEGAL); + + while (isalpha(c) || c == '_') + { + if (p == token_buffer + maxtoken) + p = grow_token_buffer(p); + + *p++ = c; + c = getc(finput); + } + + ungetc(c, finput); + + *p = 0; + + if (strcmp(token_buffer, "token") == 0 + || + strcmp(token_buffer, "term") == 0) + return (TOKEN); + else if (strcmp(token_buffer, "nterm") == 0) + return (NTERM); + else if (strcmp(token_buffer, "type") == 0) + return (TYPE); + else if (strcmp(token_buffer, "guard") == 0) + return (GUARD); + else if (strcmp(token_buffer, "union") == 0) + return (UNION); + else if (strcmp(token_buffer, "expect") == 0) + return (EXPECT); + else if (strcmp(token_buffer, "start") == 0) + return (START); + else if (strcmp(token_buffer, "left") == 0) + return (LEFT); + else if (strcmp(token_buffer, "right") == 0) + return (RIGHT); + else if (strcmp(token_buffer, "nonassoc") == 0 + || + strcmp(token_buffer, "binary") == 0) + return (NONASSOC); + else if (strcmp(token_buffer, "semantic_parser") == 0) + return (SEMANTIC_PARSER); + else if (strcmp(token_buffer, "pure_parser") == 0) + return (PURE_PARSER); + else if (strcmp(token_buffer, "prec") == 0) + return (PREC); + else return (ILLEGAL); +} diff --git a/src/symtab.c b/src/symtab.c new file mode 100644 index 0000000..adfe390 --- /dev/null +++ b/src/symtab.c @@ -0,0 +1,150 @@ +/* Symbol table manager for Bison, + Copyright (C) 1984, 1989 Free Software Foundation, Inc. + +This file is part of Bison, the GNU Compiler Compiler. + +Bison is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Bison is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Bison; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +#include +#include "system.h" +#include "new.h" +#include "symtab.h" +#include "gram.h" + + +bucket **symtab; +bucket *firstsymbol; +bucket *lastsymbol; + + + +int +hash(key) +char *key; +{ + register char *cp; + register int k; + + cp = key; + k = 0; + while (*cp) + k = ((k << 1) ^ (*cp++)) & 0x3fff; + + return (k % TABSIZE); +} + + + +char * +copys(s) +char *s; +{ + register int i; + register char *cp; + register char *result; + + i = 1; + for (cp = s; *cp; cp++) + i++; + + result = xmalloc((unsigned int)i); + strcpy(result, s); + return (result); +} + + +void +tabinit() +{ +/* register int i; JF unused */ + + symtab = NEW2(TABSIZE, bucket *); + + firstsymbol = NULL; + lastsymbol = NULL; +} + + +bucket * +getsym(key) +char *key; +{ + register int hashval; + register bucket *bp; + register int found; + + hashval = hash(key); + bp = symtab[hashval]; + + found = 0; + while (bp != NULL && found == 0) + { + if (strcmp(key, bp->tag) == 0) + found = 1; + else + bp = bp->link; + } + + if (found == 0) + { + nsyms++; + + bp = NEW(bucket); + bp->link = symtab[hashval]; + bp->next = NULL; + bp->tag = copys(key); + bp->class = SUNKNOWN; + + if (firstsymbol == NULL) + { + firstsymbol = bp; + lastsymbol = bp; + } + else + { + lastsymbol->next = bp; + lastsymbol = bp; + } + + symtab[hashval] = bp; + } + + return (bp); +} + + +void +free_symtab() +{ + register int i; + register bucket *bp,*bptmp;/* JF don't use ptr after free */ + + for (i = 0; i < TABSIZE; i++) + { + bp = symtab[i]; + while (bp) + { + bptmp = bp->link; +#if 0 /* This causes crashes because one string can appear more than once. */ + if (bp->type_name) + FREE(bp->type_name); +#endif + FREE(bp); + bp = bptmp; + } + } + FREE(symtab); +} -- 2.7.4