Fixes issue #25. (char 255 aliased to -1 and missing tests for end of input).
1) All layers of input scanning now share a single EndOfInput value.
This avoids translation of it across layers of encapsulation.
2) Some places looking for end of line were not stopping on EndOfInput.
3) Use of "char" for the input made char values > 127 be negative numbers.
This allowed for aliasing of 255 to -1, etc. This is fixed by using
unsigned char.
--- /dev/null
+#ifþ
+#endif
+#error AÿB
+#if\ 1
+#endif
+int aÿ
--- /dev/null
+badChars.frag\r
+ERROR: 0:1: 'preprocessor evaluation' : bad expression \r
+ERROR: 0:1: '#if' : unexpected tokens following directive \r
+ERROR: 0:3: '#error' : A <bad token> B \r
+ERROR: 0:4: 'preprocessor evaluation' : bad expression \r
+ERROR: 0:4: '#if' : unexpected tokens following directive \r
+ERROR: 0:6: 'ÿ' : unexpected token \r
+ERROR: 0:7: '' : syntax error\r
+ERROR: 7 compilation errors. No code generated.\r
+\r
+\r
+Shader version: 100\r
+ERROR: node is still EOpNull!\r
+0:? Linker Objects\r
+0:? 'a' (global mediump int)\r
+\r
+\r
+Linked fragment stage:\r
+\r
+ERROR: Linking fragment stage: Missing entry point: Each stage requires one "void main()" entry point\r
+\r
+Shader version: 100\r
+ERROR: node is still EOpNull!\r
+0:? Linker Objects\r
+0:? 'a' (global mediump int)\r
+\r
ERROR: 0:81: 'preprocessor evaluation' : bad expression \r
ERROR: 0:81: '#if' : unexpected tokens following directive \r
ERROR: 0:82: '#error' : good macro \r
-ERROR: 0:87: 'macro expansion' : end of line in macro substitution: foobar\r
+ERROR: 0:87: 'macro expansion' : End of line in macro substitution: foobar\r
ERROR: 0:88: 'preprocessor evaluation' : can't evaluate expression \r
ERROR: 0:88: 'preprocessor evaluation' : bad expression \r
ERROR: 0:88: '#if' : unexpected tokens following directive \r
-ERROR: 0:92: 'macro expansion' : end of line in macro substitution: foobar\r
+ERROR: 0:92: 'macro expansion' : End of line in macro substitution: foobar\r
ERROR: 0:93: 'preprocessor evaluation' : can't evaluate expression \r
ERROR: 0:93: 'preprocessor evaluation' : bad expression \r
ERROR: 0:93: '#if' : unexpected tokens following directive \r
-ERROR: 0:99: 'macro expansion' : end of line in macro substitution: foobar\r
+ERROR: 0:99: 'macro expansion' : End of line in macro substitution: foobar\r
ERROR: 0:100: 'preprocessor evaluation' : can't evaluate expression \r
ERROR: 0:100: 'preprocessor evaluation' : bad expression \r
ERROR: 0:100: '#if' : unexpected tokens following directive \r
-ERROR: 0:101: 'macro expansion' : end of line in macro substitution: foobar\r
+ERROR: 0:101: 'macro expansion' : End of line in macro substitution: foobar\r
ERROR: 0:102: 'preprocessor evaluation' : can't evaluate expression \r
ERROR: 0:102: 'preprocessor evaluation' : bad expression \r
ERROR: 0:102: '#if' : unexpected tokens following directive \r
ERROR: 0:152: '#else' : #else after #else \r
ERROR: 0:161: '#elif' : #elif after #else \r
ERROR: 0:169: '#else' : #else after #else \r
-ERROR: 0:177: 'macro expansion' : EOF in macro FUNC\r
+ERROR: 0:177: 'macro expansion' : End of input in macro FUNC\r
ERROR: 0:178: '' : syntax error\r
ERROR: 6 compilation errors. No code generated.\r
\r
ERROR: 0:50: '@' : unexpected token \r
ERROR: 0:55: '#error' : good continuation \r
WARNING: 0:62: 'line continuation' : used at end of comment; the following line is still part of the comment \r
-ERROR: 0:111: 'macro expansion' : end of line in macro substitution: FOOM\r
+ERROR: 0:111: 'macro expansion' : End of line in macro substitution: FOOM\r
ERROR: 0:112: 'preprocessor evaluation' : can't evaluate expression \r
ERROR: 0:112: '#if' : unexpected tokens following directive \r
-ERROR: 0:117: 'macro expansion' : end of line in macro substitution: FOOM\r
+ERROR: 0:117: 'macro expansion' : End of line in macro substitution: FOOM\r
ERROR: 0:118: 'preprocessor evaluation' : can't evaluate expression \r
ERROR: 0:118: '#if' : unexpected tokens following directive \r
ERROR: 0:150: '' : syntax error\r
cppIndent.vert
cppNest.vert
cppComplexExpr.vert
+badChars.frag
pointCoord.frag
array.frag
array100.frag
{
if (afterEOF) {
if (tokensBeforeEOF == 1)
- error(getCurrentLoc(), "", "pre-mature EOF", s, "");
+ error(getCurrentLoc(), "", "premature end of input", s, "");
} else
error(getCurrentLoc(), "", "", s, "");
}
get(); // consume the second '/'
c = get();
do {
- while (c > 0 && c != '\\' && c != '\r' && c != '\n')
+ while (c != EndOfInput && c != '\\' && c != '\r' && c != '\n')
c = get();
- if (c <= 0 || c == '\r' || c == '\n') {
+ if (c == EndOfInput || c == '\r' || c == '\n') {
while (c == '\r' || c == '\n')
c = get();
} while (true);
// put back the last non-comment character
- if (c > 0)
+ if (c != EndOfInput)
unget();
return true;
get(); // consume the '*'
c = get();
do {
- while (c > 0 && c != '*')
+ while (c != EndOfInput && c != '*')
c = get();
if (c == '*') {
c = get();
// if not starting a comment now, then done
int c = peek();
- if (c != '/' || c < 0)
+ if (c != '/' || c == EndOfInput)
return;
// skip potential comment
} else
do {
c = get();
- } while (c > 0 && c != '\n' && c != '\r');
+ } while (c != EndOfInput && c != '\n' && c != '\r');
while (peek() == '\n' || peek() == '\r')
get();
- if (peek() < 0)
+ if (peek() == EndOfInput)
return true;
}
lookingInMiddle = true;
char profileString[maxProfileLength];
int profileLength;
for (profileLength = 0; profileLength < maxProfileLength; ++profileLength) {
- if (c < 0 || c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ if (c == EndOfInput || c == ' ' || c == '\t' || c == '\n' || c == '\r')
break;
profileString[profileLength] = (char)c;
c = get();
}
- if (c > 0 && c != ' ' && c != '\t' && c != '\n' && c != '\r') {
+ if (c != EndOfInput && c != ' ' && c != '\t' && c != '\n' && c != '\r') {
versionNotFirst = true;
continue;
}
case PpAtomConstDouble: parserToken->sType.lex.d = ppToken.dval; return DOUBLECONSTANT;
case PpAtomIdentifier: return tokenizeIdentifier();
- case EOF: return 0;
-
+ case EndOfInput: return 0;
+
default:
char buf[2];
buf[0] = (char)ppToken.token;
namespace glslang {
+// Use a global end-of-input character, so no tranlation is needed across
+// layers of encapsulation. Characters are all 8 bit, and positive, so there is
+// no aliasing of character 255 onto -1, for example.
+const int EndOfInput = -1;
+
//
// A character scanner that seamlessly, on read-only strings, reads across an
// array of strings without assuming null termination.
class TInputScanner {
public:
TInputScanner(int n, const char* const s[], size_t L[], int b = 0, int f = 0) :
- numSources(n), sources(s), lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f)
+ numSources(n),
+ sources(reinterpret_cast<const unsigned char* const *>(s)), // up to this point, common usage is "char*", but now we need positive 8-bit characters
+ lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f)
{
loc = new TSourceLoc[numSources];
loc[currentSource].string = -stringBias;
delete [] loc;
}
- // return of -1 means end of strings,
- // anything else is the next character
-
// retrieve the next character and advance one character
int get()
{
if (currentSource >= numSources)
- return -1;
+ return EndOfInput;
int ret = peek();
++loc[currentSource].column;
int peek()
{
if (currentSource >= numSources)
- return -1;
+ return EndOfInput;
// Make sure we do not read off the end of a string.
// N.B. Sources can have a length of 0.
int sourceToRead = currentSource;
charToRead = 0;
sourceToRead += 1;
if (sourceToRead >= numSources) {
- return -1;
+ return EndOfInput;
}
}
+
+ // Here, we care about making negative valued characters positive
return sources[sourceToRead][charToRead];
}
--currentChar;
--loc[currentSource].column;
if (loc[currentSource].column < 0) {
- // We've moved back past a new line. Find the
- // previous newline (or start of the file) to compute
- // the column count on the now current line.
- size_t ch = currentChar;
- while(ch > 0) {
- if (sources[currentSource][ch] == '\n') {
- break;
+ // We've moved back past a new line. Find the
+ // previous newline (or start of the file) to compute
+ // the column count on the now current line.
+ size_t chIndex = currentChar;
+ while (chIndex > 0) {
+ if (sources[currentSource][chIndex] == '\n') {
+ break;
+ }
+ --chIndex;
}
- --ch;
- }
- loc[currentSource].column = (int)(currentChar - ch);
+ loc[currentSource].column = (int)(currentChar - chIndex);
}
} else {
do {
}
}
- int numSources; // number of strings in source
- const char* const *sources; // array of strings
- const size_t *lengths; // length of each string
+ int numSources; // number of strings in source
+ const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput
+ const size_t *lengths; // length of each string
int currentSource;
size_t currentChar;
outputStream << "#error " << errorMessage;
});
- int lastToken = EOF; // lastToken records the last token processed.
+ int lastToken = EndOfInput; // lastToken records the last token processed.
while (const char* tok = ppContext.tokenize(&token)) {
bool isNewString = lineSync.syncToMostRecentString();
bool isNewLine = lineSync.syncToLine(token.loc.line);
// Output a space in between tokens, but not at the start of a line,
// and also not around special tokens. This helps with readability
// and consistency.
- if (!isNewString && !isNewLine && lastToken != -1 &&
+ if (!isNewString && !isNewLine && lastToken != EndOfInput &&
(unNeededSpaceTokens.find((char)token.token) == std::string::npos) &&
(unNeededSpaceTokens.find((char)lastToken) == std::string::npos) &&
(noSpaceBeforeTokens.find((char)token.token) == std::string::npos)) {
}
token = scanToken(ppToken);
} while (token == ',');
- if (token != ')') {
+ if (token != ')') {
parseContext.ppError(ppToken->loc, "missing parenthesis", "#define", "");
return token;
// record the definition of the macro
TSourceLoc defineLoc = ppToken->loc; // because ppToken is going to go to the next line before we report errors
mac.body = new TokenStream;
- while (token != '\n') {
+ while (token != '\n' && token != EndOfInput) {
RecordToken(mac.body, token, ppToken);
token = scanToken(ppToken);
if (token != '\n' && ppToken->space)
int depth = 0;
int token = scanToken(ppToken);
- while (token != EOF) {
+ while (token != EndOfInput) {
if (token != '#') {
- while (token != '\n' && token != EOF)
+ while (token != '\n' && token != EndOfInput)
token = scanToken(ppToken);
- if (token == EOF)
- return EOF;
+ if (token == EndOfInput)
+ return token;
token = scanToken(ppToken);
continue;
// Call when there should be no more tokens left on a line.
int TPpContext::extraTokenCheck(int atom, TPpToken* ppToken, int token)
{
- if (token != '\n') {
+ if (token != '\n' && token != EndOfInput) {
static const char* message = "unexpected tokens following directive";
const char* label;
else
parseContext.ppError(ppToken->loc, message, label, "");
- while (token != '\n')
+ while (token != '\n' && token != EndOfInput)
token = scanToken(ppToken);
}
token = scanToken(ppToken);
if (token != '\n') {
parseContext.ppError(ppToken->loc, "unexpected tokens following #ifdef directive - expected a newline", "#ifdef", "");
- while (token != '\n')
+ while (token != '\n' && token != EndOfInput)
token = scanToken(ppToken);
}
if (((s && !s->mac.undef) ? 1 : 0) != defined)
std::string message;
TSourceLoc loc = ppToken->loc;
- while (token != '\n') {
+ while (token != '\n' && token != EndOfInput) {
if (token == PpAtomConstInt || token == PpAtomConstUint ||
token == PpAtomConstFloat || token == PpAtomConstDouble) {
message.append(ppToken->name);
TSourceLoc loc = ppToken->loc; // because we go to the next line before processing
int token = scanToken(ppToken);
- while (token != '\n' && token != EOF) {
+ while (token != '\n' && token != EndOfInput) {
switch (token) {
case PpAtomIdentifier:
case PpAtomConstInt:
token = scanToken(ppToken);
}
- if (token == EOF)
+ if (token == EndOfInput)
parseContext.ppError(loc, "directive must end with a newline", "#pragma", "");
else
parseContext.handlePragma(loc, tokens);
parseContext.ppError(ppToken->loc, "#elif after #else", "#elif", "");
// this token is really a dont care, but we still need to eat the tokens
token = scanToken(ppToken);
- while (token != '\n')
+ while (token != '\n' && token != EndOfInput)
token = scanToken(ppToken);
token = CPPelse(0, ppToken);
break;
parseContext.ppError(ppToken->loc, "invalid directive:", "#", ppToken->name);
break;
}
- } else if (token != '\n' && token != EOF)
+ } else if (token != '\n' && token != EndOfInput)
parseContext.ppError(ppToken->loc, "invalid directive", "#", "");
- while (token != '\n' && token != 0 && token != EOF)
+ while (token != '\n' && token != EndOfInput)
token = scanToken(ppToken);
return token;
token = ReadToken(a, ppToken);
if (token == PpAtomIdentifier && LookUpSymbol(ppToken->atom))
break;
- } while (token != tInput::endOfInput);
+ } while (token != EndOfInput);
- if (token == tInput::endOfInput)
+ if (token == EndOfInput)
return a;
n = new TokenStream;
}
}
- if (token == endOfInput)
+ if (token == EndOfInput)
mac->busy = 0;
return token;
}
-// return a zero, for scanning a macro that was never defined
+// return a textual zero, for scanning a macro that was never defined
int TPpContext::tZeroInput::scan(TPpToken* ppToken)
{
if (done)
- return endOfInput;
+ return EndOfInput;
strcpy(ppToken->name, "0");
ppToken->ival = 0;
depth = 0;
while (1) {
token = scanToken(ppToken);
- if (token == EOF) {
- parseContext.ppError(loc, "EOF in macro", "macro expansion", GetAtomString(atom));
+ if (token == EndOfInput) {
+ parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(atom));
delete in;
return 0;
}
if (token == '\n') {
if (! newLineOkay) {
- parseContext.ppError(loc, "end of line in macro substitution:", "macro expansion", GetAtomString(atom));
+ parseContext.ppError(loc, "End of line in macro substitution:", "macro expansion", GetAtomString(atom));
delete in;
return 0;
}
parseContext.ppError(loc, "Too few args in Macro", "macro expansion", GetAtomString(atom));
else if (token != ')') {
depth=0;
- while (token != EOF && (depth > 0 || token != ')')) {
+ while (token != EndOfInput && (depth > 0 || token != ')')) {
if (token == ')')
depth--;
token = scanToken(ppToken);
depth++;
}
- if (token == EOF) {
- parseContext.ppError(loc, "EOF in macro", "macro expansion", GetAtomString(atom));
+ if (token == EndOfInput) {
+ parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(atom));
delete in;
return 0;
}
//
const char* TPpContext::GetAtomString(int atom)
{
- if (atom == 0)
- return "<null atom>";
- if (atom < 0)
- return "<EOF>";
- if ((size_t)atom < stringMap.size()) {
- if (stringMap[atom] == 0)
- return "<invalid atom>";
- else
- return stringMap[atom]->c_str();
- }
+ if ((size_t)atom >= stringMap.size())
+ return "<bad token>";
+
+ const TString* atomString = stringMap[atom];
- return "<invalid atom>";
+ return atomString ? atomString->c_str() : "<bad token>";
}
//
virtual int getch() = 0;
virtual void ungetch() = 0;
- static const int endOfInput = -2;
-
protected:
bool done;
TPpContext* pp;
// Get the next token from *stack* of input sources, popping input sources
// that are out of tokens, down until an input sources is found that has a token.
- // Return EOF when there are no more tokens to be found by doing this.
+ // Return EndOfInput when there are no more tokens to be found by doing this.
int scanToken(TPpToken* ppToken)
{
- int token = EOF;
+ int token = EndOfInput;
while (! inputStack.empty()) {
token = inputStack.back()->scan(ppToken);
- if (token != tInput::endOfInput)
+ if (token != EndOfInput)
break;
popInput();
}
- if (token == tInput::endOfInput)
- return EOF;
-
return token;
}
int getChar() { return inputStack.back()->getch(); }
}
virtual int scan(TPpToken*);
- virtual int getch() { assert(0); return endOfInput; }
+ virtual int getch() { assert(0); return EndOfInput; }
virtual void ungetch() { assert(0); }
MacroSymbol *mac;
TVector<TokenStream*> args;
virtual int scan(TPpToken*)
{
if (done)
- return endOfInput;
+ return EndOfInput;
done = true;
return marker;
}
- virtual int getch() { assert(0); return endOfInput; }
+ virtual int getch() { assert(0); return EndOfInput; }
virtual void ungetch() { assert(0); }
static const int marker = -3;
};
public:
tZeroInput(TPpContext* pp) : tInput(pp) { }
virtual int scan(TPpToken*);
- virtual int getch() { assert(0); return endOfInput; }
+ virtual int getch() { assert(0); return EndOfInput; }
virtual void ungetch() { assert(0); }
};
public:
tTokenInput(TPpContext* pp, TokenStream* t) : tInput(pp), tokens(t) { }
virtual int scan(TPpToken *);
- virtual int getch() { assert(0); return endOfInput; }
+ virtual int getch() { assert(0); return EndOfInput; }
virtual void ungetch() { assert(0); }
protected:
TokenStream *tokens;
public:
tUngotTokenInput(TPpContext* pp, int t, TPpToken* p) : tInput(pp), token(t), lval(*p) { }
virtual int scan(TPpToken *);
- virtual int getch() { assert(0); return endOfInput; }
+ virtual int getch() { assert(0); return EndOfInput; }
virtual void ungetch() { assert(0); }
protected:
int token;
len = 0;
switch (ch) {
default:
- // Single character token, including '#' and '\' (escaped newlines are handled at a lower level, so this is just a '\' token)
+ // Single character token, including EndOfInput, '#' and '\' (escaped newlines are handled at a lower level, so this is just a '\' token)
return ch;
- case EOF:
- return endOfInput;
-
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
pp->inComment = true;
do {
ch = pp->getChar();
- } while (ch != '\n' && ch != EOF);
+ } while (ch != '\n' && ch != EndOfInput);
ppToken->space = true;
pp->inComment = false;
- if (ch == EOF)
- return endOfInput;
-
return ch;
} else if (ch == '*') {
ch = pp->getChar();
do {
while (ch != '*') {
- if (ch == EOF) {
- pp->parseContext.ppError(ppToken->loc, "EOF in comment", "comment", "");
- return endOfInput;
+ if (ch == EndOfInput) {
+ pp->parseContext.ppError(ppToken->loc, "End of input in comment", "comment", "");
+ return ch;
}
ch = pp->getChar();
}
ch = pp->getChar();
- if (ch == EOF) {
- pp->parseContext.ppError(ppToken->loc, "EOF in comment", "comment", "");
- return endOfInput;
+ if (ch == EndOfInput) {
+ pp->parseContext.ppError(ppToken->loc, "End of input in comment", "comment", "");
+ return ch;
}
} while (ch != '/');
ppToken->space = true;
break;
case '"':
ch = pp->getChar();
- while (ch != '"' && ch != '\n' && ch != EOF) {
+ while (ch != '"' && ch != '\n' && ch != EndOfInput) {
if (len < MaxTokenLength) {
tokenText[len] = (char)ch;
len++;
tokenText[len] = '\0';
if (ch != '"') {
pp->ungetChar();
- pp->parseContext.ppError(ppToken->loc, "end of line in string", "string", "");
+ pp->parseContext.ppError(ppToken->loc, "End of line in string", "string", "");
}
return PpAtomConstString;
}
for(;;) {
token = scanToken(ppToken);
ppToken->token = token;
- if (token == EOF) {
+ if (token == EndOfInput) {
missingEndifCheck();
return nullptr;
}
if (token == '#') {
if (previous_token == '\n') {
token = readCPPline(ppToken);
- if (token == EOF) {
+ if (token == EndOfInput) {
missingEndifCheck();
return nullptr;
}
if (pTok->current < pTok->data.size())
return pTok->data[pTok->current++];
else
- return tInput::endOfInput;
+ return EndOfInput;
}
void TPpContext::lUnreadByte(TokenStream *pTok)
int TPpContext::tUngotTokenInput::scan(TPpToken* ppToken)
{
if (done)
- return endOfInput;
+ return EndOfInput;
int ret = token;
*ppToken = lval;