Restructure comment lexing to not recurse.

author River Riddle <riverriddle@google.com>

Tue, 12 Nov 2019 03:14:43 +0000 (19:14 -0800)

committer A. Unique TensorFlower <gardener@tensorflow.org>

Tue, 12 Nov 2019 03:15:13 +0000 (19:15 -0800)
author River Riddle <riverriddle@google.com>
Tue, 12 Nov 2019 03:14:43 +0000 (19:14 -0800)
committer A. Unique TensorFlower <gardener@tensorflow.org>
Tue, 12 Nov 2019 03:15:13 +0000 (19:15 -0800)
diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp

index 917cf91..e8034d8 100644 (file)
--- a/mlir/lib/Parser/Lexer.cpp
+++ b/mlir/lib/Parser/Lexer.cpp
@@ -63,114 +63,107 @@ Token Lexer::emitError(const char *loc, const Twine &message) {
  }
  
  Token Lexer::lexToken() {
-  // Ignore whitespace.
    while (true) {
-    switch (*curPtr) {
+    const char *tokStart = curPtr;
+    switch (*curPtr++) {
+    default:
+      // Handle bare identifiers.
+      if (isalpha(curPtr[-1]))
+        return lexBareIdentifierOrKeyword(tokStart);
+
+      // Unknown character, emit an error.
+      return emitError(tokStart, "unexpected character");
+
      case ' ':
      case '\t':
      case '\n':
      case '\r':
-      ++curPtr;
+      // Handle whitespace.
        continue;
-    default:
-      // Terminate loop on non-whitespace, including either an embedded or
-      // final terminating nul character that llvm::MemoryBuffer guarantees
-      // will be there.
-      break;
-    }
-    break;
-  }
  
-  const char *tokStart = curPtr;
-  switch (*curPtr++) {
-  default:
-    // Handle bare identifiers.
-    if (isalpha(curPtr[-1]))
+    case '_':
+      // Handle bare identifiers.
        return lexBareIdentifierOrKeyword(tokStart);
  
-    // Unknown character, emit an error.
-    return emitError(tokStart, "unexpected character");
-
-  case '_':
-    // Handle bare identifiers.
-    return lexBareIdentifierOrKeyword(tokStart);
-
-  case 0:
-    // This may either be a nul character in the source file or may be the EOF
-    // marker that llvm::MemoryBuffer guarantees will be there.
-    if (curPtr - 1 == curBuffer.end())
-      return formToken(Token::eof, tokStart);
-
-    LLVM_FALLTHROUGH;
-  case ':':
-    return formToken(Token::colon, tokStart);
-  case ',':
-    return formToken(Token::comma, tokStart);
-  case '.':
-    return lexEllipsis(tokStart);
-  case '(':
-    return formToken(Token::l_paren, tokStart);
-  case ')':
-    return formToken(Token::r_paren, tokStart);
-  case '{':
-    return formToken(Token::l_brace, tokStart);
-  case '}':
-    return formToken(Token::r_brace, tokStart);
-  case '[':
-    return formToken(Token::l_square, tokStart);
-  case ']':
-    return formToken(Token::r_square, tokStart);
-  case '<':
-    return formToken(Token::less, tokStart);
-  case '>':
-    return formToken(Token::greater, tokStart);
-  case '=':
-    return formToken(Token::equal, tokStart);
-
-  case '+':
-    return formToken(Token::plus, tokStart);
-  case '*':
-    return formToken(Token::star, tokStart);
-  case '-':
-    if (*curPtr == '>') {
-      ++curPtr;
-      return formToken(Token::arrow, tokStart);
-    }
-    return formToken(Token::minus, tokStart);
+    case 0:
+      // This may either be a nul character in the source file or may be the EOF
+      // marker that llvm::MemoryBuffer guarantees will be there.
+      if (curPtr - 1 == curBuffer.end())
+        return formToken(Token::eof, tokStart);
  
-  case '?':
-    return formToken(Token::question, tokStart);
+      LLVM_FALLTHROUGH;
+    case ':':
+      return formToken(Token::colon, tokStart);
+    case ',':
+      return formToken(Token::comma, tokStart);
+    case '.':
+      return lexEllipsis(tokStart);
+    case '(':
+      return formToken(Token::l_paren, tokStart);
+    case ')':
+      return formToken(Token::r_paren, tokStart);
+    case '{':
+      return formToken(Token::l_brace, tokStart);
+    case '}':
+      return formToken(Token::r_brace, tokStart);
+    case '[':
+      return formToken(Token::l_square, tokStart);
+    case ']':
+      return formToken(Token::r_square, tokStart);
+    case '<':
+      return formToken(Token::less, tokStart);
+    case '>':
+      return formToken(Token::greater, tokStart);
+    case '=':
+      return formToken(Token::equal, tokStart);
+
+    case '+':
+      return formToken(Token::plus, tokStart);
+    case '*':
+      return formToken(Token::star, tokStart);
+    case '-':
+      if (*curPtr == '>') {
+        ++curPtr;
+        return formToken(Token::arrow, tokStart);
+      }
+      return formToken(Token::minus, tokStart);
  
-  case '/':
-    if (*curPtr == '/')
-      return lexComment();
-    return emitError(tokStart, "unexpected character");
+    case '?':
+      return formToken(Token::question, tokStart);
  
-  case '@':
-    return lexAtIdentifier(tokStart);
+    case '/':
+      if (*curPtr == '/') {
+        skipComment();
+        continue;
+      }
+      return emitError(tokStart, "unexpected character");
  
-  case '!':
-    LLVM_FALLTHROUGH;
-  case '^':
-    LLVM_FALLTHROUGH;
-  case '#':
-    LLVM_FALLTHROUGH;
-  case '%':
-    return lexPrefixedIdentifier(tokStart);
-  case '"':
-    return lexString(tokStart);
-
-  case '0':
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9':
-    return lexNumber(tokStart);
+    case '@':
+      return lexAtIdentifier(tokStart);
+
+    case '!':
+      LLVM_FALLTHROUGH;
+    case '^':
+      LLVM_FALLTHROUGH;
+    case '#':
+      LLVM_FALLTHROUGH;
+    case '%':
+      return lexPrefixedIdentifier(tokStart);
+    case '"':
+      return lexString(tokStart);
+
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      return lexNumber(tokStart);
+    }
    }
  }
  
@@ -231,11 +224,11 @@ Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
    return Token(kind, spelling);
  }
  
-/// Lex a comment line, starting with a semicolon.
+/// Skip a comment line, starting with a '//'.
  ///
  ///   TODO: add a regex for comments here and to the spec.
  ///
-Token Lexer::lexComment() {
+void Lexer::skipComment() {
    // Advance over the second '/' in a '//' comment.
    assert(*curPtr == '/');
    ++curPtr;
@@ -245,12 +238,12 @@ Token Lexer::lexComment() {
      case '\n':
      case '\r':
        // Newline is end of comment.
-      return lexToken();
+      return;
      case 0:
        // If this is the end of the buffer, end the comment.
        if (curPtr - 1 == curBuffer.end()) {
          --curPtr;
-        return lexToken();
+        return;
        }
        LLVM_FALLTHROUGH;
      default:
diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h

index b180771..a7a2ac4 100644 (file)
--- a/mlir/lib/Parser/Lexer.h
+++ b/mlir/lib/Parser/Lexer.h
@@ -59,12 +59,14 @@ private:
    // Lexer implementation methods.
    Token lexAtIdentifier(const char *tokStart);
    Token lexBareIdentifierOrKeyword(const char *tokStart);
-  Token lexComment();
    Token lexEllipsis(const char *tokStart);
    Token lexNumber(const char *tokStart);
    Token lexPrefixedIdentifier(const char *tokStart);
    Token lexString(const char *tokStart);
  
+  /// Skip a comment line, starting with a '//'.
+  void skipComment();
+
    const llvm::SourceMgr &sourceMgr;
    MLIRContext *context;
author	River Riddle <riverriddle@google.com>
	Tue, 12 Nov 2019 03:14:43 +0000 (19:14 -0800)
committer	A. Unique TensorFlower <gardener@tensorflow.org>
	Tue, 12 Nov 2019 03:15:13 +0000 (19:15 -0800)
mlir/lib/Parser/Lexer.cpp		patch \| blob \| history
mlir/lib/Parser/Lexer.h		patch \| blob \| history