[clang] Add a new annotation token: annot_repl_input_end

author Jun Zhang <jun@junz.org>

Tue, 16 May 2023 12:10:43 +0000 (20:10 +0800)

committer Jun Zhang <jun@junz.org>

Tue, 16 May 2023 12:10:43 +0000 (20:10 +0800)
author Jun Zhang <jun@junz.org>
Tue, 16 May 2023 12:10:43 +0000 (20:10 +0800)
committer Jun Zhang <jun@junz.org>
Tue, 16 May 2023 12:10:43 +0000 (20:10 +0800)
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h

index 650baf5..37512cc 100644 (file)
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -4324,6 +4324,7 @@ class TopLevelStmtDecl : public Decl {
    friend class ASTDeclWriter;
  
    Stmt *Statement = nullptr;
+  bool IsSemiMissing = false;
  
    TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S)
        : Decl(TopLevelStmt, DC, L), Statement(S) {}
@@ -4337,6 +4338,12 @@ public:
    SourceRange getSourceRange() const override LLVM_READONLY;
    Stmt *getStmt() { return Statement; }
    const Stmt *getStmt() const { return Statement; }
+  void setStmt(Stmt *S) {
+    assert(IsSemiMissing && "Operation supported for printing values only!");
+    Statement = S;
+  }
+  bool isSemiMissing() const { return IsSemiMissing; }
+  void setSemiMissing(bool Missing = true) { IsSemiMissing = Missing; }
  
    static bool classof(const Decl *D) { return classofKind(D->getKind()); }
    static bool classofKind(Kind K) { return K == TopLevelStmt; }
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def

index f17a602..ae67209 100644 (file)
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -942,6 +942,9 @@ ANNOTATION(module_end)
  // into the name of a header unit.
  ANNOTATION(header_unit)
  
+// Annotation for end of input in clang-repl.
+ANNOTATION(repl_input_end)
+
  #undef PRAGMA_ANNOTATION
  #undef ANNOTATION
  #undef TESTING_KEYWORD
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h

index fc892d7..17aa11b 100644 (file)
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -18,6 +18,7 @@
  #include "clang/Basic/OpenMPKinds.h"
  #include "clang/Basic/OperatorPrecedence.h"
  #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TokenKinds.h"
  #include "clang/Lex/CodeCompletionHandler.h"
  #include "clang/Lex/Preprocessor.h"
  #include "clang/Sema/DeclSpec.h"
@@ -692,7 +693,8 @@ private:
    bool isEofOrEom() {
      tok::TokenKind Kind = Tok.getKind();
      return Kind == tok::eof || Kind == tok::annot_module_begin ||
-           Kind == tok::annot_module_end || Kind == tok::annot_module_include;
+           Kind == tok::annot_module_end || Kind == tok::annot_module_include ||
+           Kind == tok::annot_repl_input_end;
    }
  
    /// Checks if the \p Level is valid for use in a fold expression.
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp

index ffa85e5..1b262d9 100644 (file)
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -663,7 +663,8 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
    // them.
    if (Tok.is(tok::eof) ||
        (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
-       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
+       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
+       !Tok.is(tok::annot_repl_input_end)))
      return;
  
    // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -819,6 +820,9 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
        // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
        PP.Lex(Tok);
        continue;
+    } else if (Tok.is(tok::annot_repl_input_end)) {
+      PP.Lex(Tok);
+      continue;
      } else if (Tok.is(tok::eod)) {
        // Don't print end of directive tokens, since they are typically newlines
        // that mess up our line tracking. These come from unknown pre-processor
diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp

index 373e284..2b93262 100644 (file)
--- a/clang/lib/Interpreter/IncrementalParser.cpp
+++ b/clang/lib/Interpreter/IncrementalParser.cpp
@@ -158,8 +158,8 @@ IncrementalParser::ParseOrWrapTopLevelDecl() {
    LastPTU.TUPart = C.getTranslationUnitDecl();
  
    // Skip previous eof due to last incremental input.
-  if (P->getCurToken().is(tok::eof)) {
-    P->ConsumeToken();
+  if (P->getCurToken().is(tok::annot_repl_input_end)) {
+    P->ConsumeAnyToken();
      // FIXME: Clang does not call ExitScope on finalizing the regular TU, we
      // might want to do that around HandleEndOfTranslationUnit.
      P->ExitScope();
@@ -259,14 +259,14 @@ IncrementalParser::Parse(llvm::StringRef input) {
      Token Tok;
      do {
        PP.Lex(Tok);
-    } while (Tok.isNot(tok::eof));
+    } while (Tok.isNot(tok::annot_repl_input_end));
+  } else {
+    Token AssertTok;
+    PP.Lex(AssertTok);
+    assert(AssertTok.is(tok::annot_repl_input_end) &&
+           "Lexer must be EOF when starting incremental parse!");
    }
  
-  Token AssertTok;
-  PP.Lex(AssertTok);
-  assert(AssertTok.is(tok::eof) &&
-         "Lexer must be EOF when starting incremental parse!");
-
    if (CodeGenerator *CG = getCodeGen(Act.get())) {
      std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
      CG->StartModule("incr_module_" + std::to_string(PTUs.size()),
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp

index be6128e..e82e473 100644 (file)
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -535,13 +535,19 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
        return LeavingSubmodule;
      }
    }
-
    // If this is the end of the main file, form an EOF token.
    assert(CurLexer && "Got EOF but no current lexer set!");
    const char *EndPos = getCurLexerEndPos();
    Result.startToken();
    CurLexer->BufferPtr = EndPos;
-  CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+
+  if (isIncrementalProcessingEnabled()) {
+    CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_repl_input_end);
+    Result.setAnnotationEndLoc(Result.getLocation());
+    Result.setAnnotationValue(nullptr);
+  } else {
+    CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+  }
  
    if (isCodeCompletionEnabled()) {
      // Inserting the code-completion point increases the source buffer by 1,
diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp

index 43a0ea4..4951eb9 100644 (file)
--- a/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -838,6 +838,7 @@ bool Parser::ConsumeAndStoreUntil(tok::TokenKind T1, tok::TokenKind T2,
      case tok::annot_module_begin:
      case tok::annot_module_end:
      case tok::annot_module_include:
+    case tok::annot_repl_input_end:
        // Ran out of tokens.
        return false;
  
@@ -1244,6 +1245,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
      case tok::annot_module_begin:
      case tok::annot_module_end:
      case tok::annot_module_include:
+    case tok::annot_repl_input_end:
        // Ran out of tokens.
        return false;
  
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp

index 92fa7d8..dc93b10 100644 (file)
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2070,6 +2070,7 @@ void Parser::SkipMalformedDecl() {
      case tok::annot_module_begin:
      case tok::annot_module_end:
      case tok::annot_module_include:
+    case tok::annot_repl_input_end:
        return;
  
      default:
@@ -5456,6 +5457,13 @@ Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() {
  
    SmallVector<Decl *, 2> DeclsInGroup;
    DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get()));
+
+  if (Tok.is(tok::annot_repl_input_end) &&
+      Tok.getAnnotationValue() != nullptr) {
+    ConsumeAnnotationToken();
+    cast<TopLevelStmtDecl>(DeclsInGroup.back())->setSemiMissing();
+  }
+
    // Currently happens for things like  -fms-extensions and use `__if_exists`.
    for (Stmt *S : Stmts)
      DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S));
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp

index b423c5c..bde9df0 100644 (file)
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -544,9 +544,22 @@ StmtResult Parser::ParseExprStatement(ParsedStmtContext StmtCtx) {
      return ParseCaseStatement(StmtCtx, /*MissingCase=*/true, Expr);
    }
  
-  // Otherwise, eat the semicolon.
-  ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
-  return handleExprStmt(Expr, StmtCtx);
+  Token *CurTok = nullptr;
+  // If the semicolon is missing at the end of REPL input, consider if
+  // we want to do value printing. Note this is only enabled in C++ mode
+  // since part of the implementation requires C++ language features.
+  // Note we shouldn't eat the token since the callback needs it.
+  if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus)
+    CurTok = &Tok;
+  else
+    // Otherwise, eat the semicolon.
+    ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
+
+  StmtResult R = handleExprStmt(Expr, StmtCtx);
+  if (CurTok && !R.isInvalid())
+    CurTok->setAnnotationValue(R.get());
+
+  return R;
  }
  
  /// ParseSEHTryBlockCommon
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp

index c0f4556..bebc3d6 100644 (file)
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -320,6 +320,7 @@ bool Parser::SkipUntil(ArrayRef<tok::TokenKind> Toks, SkipUntilFlags Flags) {
      case tok::annot_module_begin:
      case tok::annot_module_end:
      case tok::annot_module_include:
+    case tok::annot_repl_input_end:
        // Stop before we change submodules. They generally indicate a "good"
        // place to pick up parsing again (except in the special case where
        // we're trying to skip to EOF).
@@ -614,11 +615,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
                                 Sema::ModuleImportState &ImportState) {
    DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(*this);
  
-  // Skip over the EOF token, flagging end of previous input for incremental
-  // processing
-  if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof))
-    ConsumeToken();
-
    Result = nullptr;
    switch (Tok.getKind()) {
    case tok::annot_pragma_unused:
@@ -697,6 +693,7 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
      return false;
  
    case tok::eof:
+  case tok::annot_repl_input_end:
      // Check whether -fmax-tokens= was reached.
      if (PP.getMaxTokens() != 0 && PP.getTokenCount() > PP.getMaxTokens()) {
        PP.Diag(Tok.getLocation(), diag::warn_max_tokens_total)
author	Jun Zhang <jun@junz.org>
	Tue, 16 May 2023 12:10:43 +0000 (20:10 +0800)
committer	Jun Zhang <jun@junz.org>
	Tue, 16 May 2023 12:10:43 +0000 (20:10 +0800)
clang/include/clang/AST/Decl.h		patch \| blob \| history
clang/include/clang/Basic/TokenKinds.def		patch \| blob \| history
clang/include/clang/Parse/Parser.h		patch \| blob \| history
clang/lib/Frontend/PrintPreprocessedOutput.cpp		patch \| blob \| history
clang/lib/Interpreter/IncrementalParser.cpp		patch \| blob \| history
clang/lib/Lex/PPLexerChange.cpp		patch \| blob \| history
clang/lib/Parse/ParseCXXInlineMethods.cpp		patch \| blob \| history
clang/lib/Parse/ParseDecl.cpp		patch \| blob \| history
clang/lib/Parse/ParseStmt.cpp		patch \| blob \| history
clang/lib/Parse/Parser.cpp		patch \| blob \| history