From: Kadir Cetinkaya Date: Thu, 27 Feb 2020 14:10:54 +0000 (+0100) Subject: [clangd] Use tokenize instead of raw lexer in SourceCode/lex X-Git-Tag: llvmorg-12-init~13403 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=98bb094c1e0580833a79d60dbc853bd1a43aa1a6;p=platform%2Fupstream%2Fllvm.git [clangd] Use tokenize instead of raw lexer in SourceCode/lex Reviewers: hokein, sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D75249 --- diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 2b0a857..3feddd1 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -23,6 +23,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" #include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" @@ -612,31 +613,26 @@ cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, static void lex(llvm::StringRef Code, const LangOptions &LangOpts, - llvm::function_ref + llvm::function_ref Action) { // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! std::string NullTerminatedCode = Code.str(); SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode); auto &SM = FileSM.get(); - auto FID = SM.getMainFileID(); - // Create a raw lexer (with no associated preprocessor object). - Lexer Lex(FID, SM.getBuffer(FID), SM, LangOpts); - Token Tok; - - while (!Lex.LexFromRawLexer(Tok)) + for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) Action(Tok, SM); - // LexFromRawLexer returns true after it lexes last token, so we still have - // one more token to report. - Action(Tok, SM); } llvm::StringMap collectIdentifiers(llvm::StringRef Content, const format::FormatStyle &Style) { llvm::StringMap Identifiers; auto LangOpt = format::getFormattingLangOpts(Style); - lex(Content, LangOpt, [&](const clang::Token &Tok, const SourceManager &) { - if (Tok.getKind() == tok::raw_identifier) - ++Identifiers[Tok.getRawIdentifier()]; + lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) { + if (Tok.kind() == tok::identifier) + ++Identifiers[Tok.text(SM)]; + // FIXME: Should this function really return keywords too ? + else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind())) + ++Identifiers[Keyword]; }); return Identifiers; } @@ -645,16 +641,13 @@ std::vector collectIdentifierRanges(llvm::StringRef Identifier, llvm::StringRef Content, const LangOptions &LangOpts) { std::vector Ranges; - lex(Content, LangOpts, [&](const clang::Token &Tok, const SourceManager &SM) { - if (Tok.getKind() != tok::raw_identifier) - return; - if (Tok.getRawIdentifier() != Identifier) - return; - auto Range = getTokenRange(SM, LangOpts, Tok.getLocation()); - if (!Range) - return; - Ranges.push_back(*Range); - }); + lex(Content, LangOpts, + [&](const syntax::Token &Tok, const SourceManager &SM) { + if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) + return; + if (auto Range = getTokenRange(SM, LangOpts, Tok.location())) + Ranges.push_back(*Range); + }); return Ranges; } @@ -691,97 +684,113 @@ void parseNamespaceEvents(llvm::StringRef Code, NamespaceEvent Event; lex(Code, format::getFormattingLangOpts(Style), - [&](const clang::Token &Tok,const SourceManager &SM) { - Event.Pos = sourceLocToPosition(SM, Tok.getLocation()); - switch (Tok.getKind()) { - case tok::raw_identifier: - // In raw mode, this could be a keyword or a name. - switch (State) { - case UsingNamespace: - case UsingNamespaceName: - NSName.append(std::string(Tok.getRawIdentifier())); - State = UsingNamespaceName; - break; - case Namespace: - case NamespaceName: - NSName.append(std::string(Tok.getRawIdentifier())); - State = NamespaceName; - break; - case Using: - State = - (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default; - break; - case Default: - NSName.clear(); - if (Tok.getRawIdentifier() == "namespace") - State = Namespace; - else if (Tok.getRawIdentifier() == "using") - State = Using; - break; - } - break; - case tok::coloncolon: - // This can come at the beginning or in the middle of a namespace name. - switch (State) { - case UsingNamespace: - case UsingNamespaceName: - NSName.append("::"); - State = UsingNamespaceName; - break; - case NamespaceName: - NSName.append("::"); - State = NamespaceName; - break; - case Namespace: // Not legal here. - case Using: - case Default: - State = Default; - break; - } - break; - case tok::l_brace: - // Record which { started a namespace, so we know when } ends one. - if (State == NamespaceName) { - // Parsed: namespace { - BraceStack.push_back(true); - Enclosing.push_back(NSName); - Event.Trigger = NamespaceEvent::BeginNamespace; - Event.Payload = llvm::join(Enclosing, "::"); - Callback(Event); - } else { - // This case includes anonymous namespaces (State = Namespace). - // For our purposes, they're not namespaces and we ignore them. - BraceStack.push_back(false); - } - State = Default; - break; - case tok::r_brace: - // If braces are unmatched, we're going to be confused, but don't crash. - if (!BraceStack.empty()) { - if (BraceStack.back()) { - // Parsed: } // namespace - Enclosing.pop_back(); - Event.Trigger = NamespaceEvent::EndNamespace; - Event.Payload = llvm::join(Enclosing, "::"); - Callback(Event); + [&](const syntax::Token &Tok, const SourceManager &SM) { + Event.Pos = sourceLocToPosition(SM, Tok.location()); + switch (Tok.kind()) { + case tok::kw_using: + State = State == Default ? Using : Default; + break; + case tok::kw_namespace: + switch (State) { + case Using: + State = UsingNamespace; + break; + case Default: + State = Namespace; + break; + default: + State = Default; + break; + } + break; + case tok::identifier: + switch (State) { + case UsingNamespace: + NSName.clear(); + LLVM_FALLTHROUGH; + case UsingNamespaceName: + NSName.append(Tok.text(SM).str()); + State = UsingNamespaceName; + break; + case Namespace: + NSName.clear(); + LLVM_FALLTHROUGH; + case NamespaceName: + NSName.append(Tok.text(SM).str()); + State = NamespaceName; + break; + case Using: + case Default: + State = Default; + break; + } + break; + case tok::coloncolon: + // This can come at the beginning or in the middle of a namespace + // name. + switch (State) { + case UsingNamespace: + NSName.clear(); + LLVM_FALLTHROUGH; + case UsingNamespaceName: + NSName.append("::"); + State = UsingNamespaceName; + break; + case NamespaceName: + NSName.append("::"); + State = NamespaceName; + break; + case Namespace: // Not legal here. + case Using: + case Default: + State = Default; + break; + } + break; + case tok::l_brace: + // Record which { started a namespace, so we know when } ends one. + if (State == NamespaceName) { + // Parsed: namespace { + BraceStack.push_back(true); + Enclosing.push_back(NSName); + Event.Trigger = NamespaceEvent::BeginNamespace; + Event.Payload = llvm::join(Enclosing, "::"); + Callback(Event); + } else { + // This case includes anonymous namespaces (State = Namespace). + // For our purposes, they're not namespaces and we ignore them. + BraceStack.push_back(false); + } + State = Default; + break; + case tok::r_brace: + // If braces are unmatched, we're going to be confused, but don't + // crash. + if (!BraceStack.empty()) { + if (BraceStack.back()) { + // Parsed: } // namespace + Enclosing.pop_back(); + Event.Trigger = NamespaceEvent::EndNamespace; + Event.Payload = llvm::join(Enclosing, "::"); + Callback(Event); + } + BraceStack.pop_back(); + } + break; + case tok::semi: + if (State == UsingNamespaceName) { + // Parsed: using namespace ; + Event.Trigger = NamespaceEvent::UsingDirective; + Event.Payload = std::move(NSName); + Callback(Event); + } + State = Default; + break; + default: + State = Default; + break; } - BraceStack.pop_back(); - } - break; - case tok::semi: - if (State == UsingNamespaceName) { - // Parsed: using namespace ; - Event.Trigger = NamespaceEvent::UsingDirective; - Event.Payload = std::move(NSName); - Callback(Event); - } - State = Default; - break; - default: - State = Default; - break; - } - }); + }); } // Returns the prefix namespaces of NS: {"" ... NS}.