}
Token Lexer::lexToken() {
- // Ignore whitespace.
while (true) {
- switch (*curPtr) {
+ const char *tokStart = curPtr;
+ switch (*curPtr++) {
+ default:
+ // Handle bare identifiers.
+ if (isalpha(curPtr[-1]))
+ return lexBareIdentifierOrKeyword(tokStart);
+
+ // Unknown character, emit an error.
+ return emitError(tokStart, "unexpected character");
+
case ' ':
case '\t':
case '\n':
case '\r':
- ++curPtr;
+ // Handle whitespace.
continue;
- default:
- // Terminate loop on non-whitespace, including either an embedded or
- // final terminating nul character that llvm::MemoryBuffer guarantees
- // will be there.
- break;
- }
- break;
- }
- const char *tokStart = curPtr;
- switch (*curPtr++) {
- default:
- // Handle bare identifiers.
- if (isalpha(curPtr[-1]))
+ case '_':
+ // Handle bare identifiers.
return lexBareIdentifierOrKeyword(tokStart);
- // Unknown character, emit an error.
- return emitError(tokStart, "unexpected character");
-
- case '_':
- // Handle bare identifiers.
- return lexBareIdentifierOrKeyword(tokStart);
-
- case 0:
- // This may either be a nul character in the source file or may be the EOF
- // marker that llvm::MemoryBuffer guarantees will be there.
- if (curPtr - 1 == curBuffer.end())
- return formToken(Token::eof, tokStart);
-
- LLVM_FALLTHROUGH;
- case ':':
- return formToken(Token::colon, tokStart);
- case ',':
- return formToken(Token::comma, tokStart);
- case '.':
- return lexEllipsis(tokStart);
- case '(':
- return formToken(Token::l_paren, tokStart);
- case ')':
- return formToken(Token::r_paren, tokStart);
- case '{':
- return formToken(Token::l_brace, tokStart);
- case '}':
- return formToken(Token::r_brace, tokStart);
- case '[':
- return formToken(Token::l_square, tokStart);
- case ']':
- return formToken(Token::r_square, tokStart);
- case '<':
- return formToken(Token::less, tokStart);
- case '>':
- return formToken(Token::greater, tokStart);
- case '=':
- return formToken(Token::equal, tokStart);
-
- case '+':
- return formToken(Token::plus, tokStart);
- case '*':
- return formToken(Token::star, tokStart);
- case '-':
- if (*curPtr == '>') {
- ++curPtr;
- return formToken(Token::arrow, tokStart);
- }
- return formToken(Token::minus, tokStart);
+ case 0:
+ // This may either be a nul character in the source file or may be the EOF
+ // marker that llvm::MemoryBuffer guarantees will be there.
+ if (curPtr - 1 == curBuffer.end())
+ return formToken(Token::eof, tokStart);
- case '?':
- return formToken(Token::question, tokStart);
+ LLVM_FALLTHROUGH;
+ case ':':
+ return formToken(Token::colon, tokStart);
+ case ',':
+ return formToken(Token::comma, tokStart);
+ case '.':
+ return lexEllipsis(tokStart);
+ case '(':
+ return formToken(Token::l_paren, tokStart);
+ case ')':
+ return formToken(Token::r_paren, tokStart);
+ case '{':
+ return formToken(Token::l_brace, tokStart);
+ case '}':
+ return formToken(Token::r_brace, tokStart);
+ case '[':
+ return formToken(Token::l_square, tokStart);
+ case ']':
+ return formToken(Token::r_square, tokStart);
+ case '<':
+ return formToken(Token::less, tokStart);
+ case '>':
+ return formToken(Token::greater, tokStart);
+ case '=':
+ return formToken(Token::equal, tokStart);
+
+ case '+':
+ return formToken(Token::plus, tokStart);
+ case '*':
+ return formToken(Token::star, tokStart);
+ case '-':
+ if (*curPtr == '>') {
+ ++curPtr;
+ return formToken(Token::arrow, tokStart);
+ }
+ return formToken(Token::minus, tokStart);
- case '/':
- if (*curPtr == '/')
- return lexComment();
- return emitError(tokStart, "unexpected character");
+ case '?':
+ return formToken(Token::question, tokStart);
- case '@':
- return lexAtIdentifier(tokStart);
+ case '/':
+ if (*curPtr == '/') {
+ skipComment();
+ continue;
+ }
+ return emitError(tokStart, "unexpected character");
- case '!':
- LLVM_FALLTHROUGH;
- case '^':
- LLVM_FALLTHROUGH;
- case '#':
- LLVM_FALLTHROUGH;
- case '%':
- return lexPrefixedIdentifier(tokStart);
- case '"':
- return lexString(tokStart);
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- return lexNumber(tokStart);
+ case '@':
+ return lexAtIdentifier(tokStart);
+
+ case '!':
+ LLVM_FALLTHROUGH;
+ case '^':
+ LLVM_FALLTHROUGH;
+ case '#':
+ LLVM_FALLTHROUGH;
+ case '%':
+ return lexPrefixedIdentifier(tokStart);
+ case '"':
+ return lexString(tokStart);
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return lexNumber(tokStart);
+ }
}
}
return Token(kind, spelling);
}
-/// Lex a comment line, starting with a semicolon.
+/// Skip a comment line, starting with a '//'.
///
/// TODO: add a regex for comments here and to the spec.
///
-Token Lexer::lexComment() {
+void Lexer::skipComment() {
// Advance over the second '/' in a '//' comment.
assert(*curPtr == '/');
++curPtr;
case '\n':
case '\r':
// Newline is end of comment.
- return lexToken();
+ return;
case 0:
// If this is the end of the buffer, end the comment.
if (curPtr - 1 == curBuffer.end()) {
--curPtr;
- return lexToken();
+ return;
}
LLVM_FALLTHROUGH;
default: