From 1ad15046dcf6ff8bafc4a1ea13f214f8d6ba7de6 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Wed, 18 Dec 2019 11:52:43 +0100 Subject: [PATCH] [Syntax] Allow to mutate syntax trees Summary: This patch adds facilities to mutate the syntax trees and produce corresponding text replacements. The public interface of the syntax library now includes facilities to: 1. perform type-safe modifications of syntax trees, 2. compute textual replacements to apply the modifications, 3. create syntax trees not backed by the source code. For each of the three, we only add a few example transformations in this patch to illustrate the idea, support for more kinds of nodes and transformations will be done in follow-up patches. The high-level mutation operations are implemented on top of operations that allow to arbitrarily change the trees. They are considered to be implementation details and are not available to the users of the library. Reviewers: sammccall, gribozavr2 Reviewed By: gribozavr2 Subscribers: merge_guards_bot, mgorny, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64573 --- clang/include/clang/Tooling/Syntax/BuildTree.h | 9 ++ clang/include/clang/Tooling/Syntax/Mutations.h | 37 +++++++ clang/include/clang/Tooling/Syntax/Tokens.h | 4 + clang/include/clang/Tooling/Syntax/Tree.h | 44 +++++++- clang/lib/Tooling/Syntax/BuildTree.cpp | 31 ++++-- clang/lib/Tooling/Syntax/CMakeLists.txt | 3 + clang/lib/Tooling/Syntax/ComputeReplacements.cpp | 126 +++++++++++++++++++++++ clang/lib/Tooling/Syntax/Mutations.cpp | 84 +++++++++++++++ clang/lib/Tooling/Syntax/Synthesis.cpp | 38 +++++++ clang/lib/Tooling/Syntax/Tokens.cpp | 8 +- clang/lib/Tooling/Syntax/Tree.cpp | 82 ++++++++++++++- clang/unittests/Tooling/Syntax/CMakeLists.txt | 1 + clang/unittests/Tooling/Syntax/TreeTest.cpp | 125 +++++++++++++++++++++- 13 files changed, 572 insertions(+), 20 deletions(-) create mode 100644 clang/include/clang/Tooling/Syntax/Mutations.h create mode 100644 clang/lib/Tooling/Syntax/ComputeReplacements.cpp create mode 100644 clang/lib/Tooling/Syntax/Mutations.cpp create mode 100644 clang/lib/Tooling/Syntax/Synthesis.cpp diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h index 055d646..b7ad50c 100644 --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -11,7 +11,9 @@ #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H #include "clang/AST/Decl.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tree.h" namespace clang { namespace syntax { @@ -19,6 +21,13 @@ namespace syntax { /// Build a syntax tree for the main file. syntax::TranslationUnit *buildSyntaxTree(Arena &A, const clang::TranslationUnitDecl &TU); + +// Create syntax trees from subtrees not backed by the source code. + +clang::syntax::Leaf *createPunctuation(clang::syntax::Arena &A, + clang::tok::TokenKind K); +clang::syntax::EmptyStatement *createEmptyStatement(clang::syntax::Arena &A); + } // namespace syntax } // namespace clang #endif diff --git a/clang/include/clang/Tooling/Syntax/Mutations.h b/clang/include/clang/Tooling/Syntax/Mutations.h new file mode 100644 index 0000000..8fd58ae --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/Mutations.h @@ -0,0 +1,37 @@ +//===- Mutations.h - mutate syntax trees --------------------*- C++ ---*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines high-level APIs for transforming syntax trees and producing the +// corresponding textual replacements. +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H +#define LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H + +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tree.h" + +namespace clang { +namespace syntax { + +/// Computes textual replacements required to mimic the tree modifications made +/// to the syntax tree. +tooling::Replacements computeReplacements(const Arena &A, + const syntax::TranslationUnit &TU); + +/// Removes a statement or replaces it with an empty statement where one is +/// required syntactically. E.g., in the following example: +/// if (cond) { foo(); } else bar(); +/// One can remove `foo();` completely and to remove `bar();` we would need to +/// replace it with an empty statement. +/// EXPECTS: S->canModify() == true +void removeStatement(syntax::Arena &A, syntax::Statement *S); + +} // namespace syntax +} // namespace clang + +#endif diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index 0dcb6a1..1f9eeae 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -78,6 +78,10 @@ struct FileRange { /// Gets the substring that this FileRange refers to. llvm::StringRef text(const SourceManager &SM) const; + /// Convert to the clang range. The returned range is always a char range, + /// never a token range. + CharSourceRange toCharRange(const SourceManager &SM) const; + friend bool operator==(const FileRange &L, const FileRange &R) { return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); } diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index b667775..7e34ed2 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -65,6 +65,9 @@ private: class Tree; class TreeBuilder; +class FactoryImpl; +class MutationsImpl; + enum class NodeKind : uint16_t; enum class NodeRole : uint8_t; @@ -79,6 +82,23 @@ public: NodeKind kind() const { return static_cast(Kind); } NodeRole role() const { return static_cast(Role); } + /// Whether the node is detached from a tree, i.e. does not have a parent. + bool isDetached() const; + /// Whether the node was created from the AST backed by the source code + /// rather than added later through mutation APIs or created with factory + /// functions. + /// When this flag is true, all subtrees are also original. + /// This flag is set to false on any modifications to the node or any of its + /// subtrees, even if this simply involves swapping existing subtrees. + bool isOriginal() const { return Original; } + /// If this function return false, the tree cannot be modified because there + /// is no reasonable way to produce the corresponding textual replacements. + /// This can happen when the node crosses macro expansion boundaries. + /// + /// Note that even if the node is not modifiable, its child nodes can be + /// modifiable. + bool canModify() const { return CanModify; } + const Tree *parent() const { return Parent; } Tree *parent() { return Parent; } @@ -93,11 +113,17 @@ public: private: // Tree is allowed to change the Parent link and Role. friend class Tree; + // TreeBuilder is allowed to set the Original and CanModify flags. + friend class TreeBuilder; + // MutationsImpl sets roles and CanModify flag. + friend class MutationsImpl; Tree *Parent; Node *NextSibling; unsigned Kind : 16; unsigned Role : 8; + unsigned Original : 1; + unsigned CanModify : 1; }; /// A leaf node points to a single token inside the expanded token stream. @@ -121,6 +147,14 @@ public: Node *firstChild() { return FirstChild; } const Node *firstChild() const { return FirstChild; } + Leaf *firstLeaf(); + const Leaf *firstLeaf() const { + return const_cast(this)->firstLeaf(); + } + + Leaf *lastLeaf(); + const Leaf *lastLeaf() const { return const_cast(this)->lastLeaf(); } + protected: /// Find the first node with a corresponding role. syntax::Node *findChild(NodeRole R); @@ -128,10 +162,18 @@ protected: private: /// Prepend \p Child to the list of children and and sets the parent pointer. /// A very low-level operation that does not check any invariants, only used - /// by TreeBuilder. + /// by TreeBuilder and FactoryImpl. /// EXPECTS: Role != NodeRoleDetached. void prependChildLowLevel(Node *Child, NodeRole Role); friend class TreeBuilder; + friend class FactoryImpl; + + /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of + /// new nodes starting at \p New. + /// Only used by MutationsImpl to implement higher-level mutation operations. + /// (!) \p New can be null to model removal of the child range. + void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New); + friend class MutationsImpl; Node *FirstChild = nullptr; }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index e13bb2d..6f8a42a 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -85,7 +86,7 @@ public: assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Tokens.drop_back(), + Pending.foldChildren(Arena, Tokens.drop_back(), new (Arena.allocator()) syntax::TranslationUnit); return cast(std::move(Pending).finalize()); @@ -156,9 +157,12 @@ private: assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) - Trees.insert(Trees.end(), - {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); + for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { + auto *L = new (A.allocator()) syntax::Leaf(&T); + L->Original = true; + L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); + Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); + } } ~Forest() { assert(DelayedFolds.empty()); } @@ -176,18 +180,19 @@ private: } /// Add \p Node to the forest and attach child nodes based on \p Tokens. - void foldChildren(llvm::ArrayRef Tokens, + void foldChildren(const syntax::Arena &A, + llvm::ArrayRef Tokens, syntax::Tree *Node) { // Execute delayed folds inside `Tokens`. auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); auto It = BeginExecuted; for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) - foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End), + foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), It->second.Node); DelayedFolds.erase(BeginExecuted, It); // Attach children to `Node`. - foldChildrenEager(Tokens, Node); + foldChildrenEager(A, Tokens, Node); } /// Schedule a call to `foldChildren` that will only be executed when @@ -244,7 +249,8 @@ private: private: /// Implementation detail of `foldChildren`, does acutal folding ignoring /// delayed folds. - void foldChildrenEager(llvm::ArrayRef Tokens, + void foldChildrenEager(const syntax::Arena &A, + llvm::ArrayRef Tokens, syntax::Tree *Node) { assert(Node->firstChild() == nullptr && "node already has children"); @@ -263,6 +269,10 @@ private: Node->prependChildLowLevel(std::prev(It)->second.Node, std::prev(It)->second.Role); + // Mark that this node came from the AST and is backed by the source code. + Node->Original = true; + Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); + Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, NodeAndRole(Node)}); } @@ -585,7 +595,7 @@ private: void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, syntax::Tree *New) { - Pending.foldChildren(Range, New); + Pending.foldChildren(Arena, Range, New); } void syntax::TreeBuilder::noticeDeclaratorRange( @@ -617,7 +627,8 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { Pending.assignRole(getExprRange(E), NodeRole::ExpressionStatement_expression); // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. - Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); + Pending.foldChildren(Arena, Range, + new (allocator()) syntax::ExpressionStatement); } Pending.assignRole(Range, Role); } diff --git a/clang/lib/Tooling/Syntax/CMakeLists.txt b/clang/lib/Tooling/Syntax/CMakeLists.txt index fee5f5b..eb26641 100644 --- a/clang/lib/Tooling/Syntax/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/CMakeLists.txt @@ -2,7 +2,10 @@ set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangToolingSyntax BuildTree.cpp + ComputeReplacements.cpp Nodes.cpp + Mutations.cpp + Synthesis.cpp Tokens.cpp Tree.cpp diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp new file mode 100644 index 0000000..30b3ee1 --- /dev/null +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -0,0 +1,126 @@ +//===- ComputeReplacements.cpp --------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/Support/Error.h" + +using namespace clang; + +namespace { +using ProcessTokensFn = llvm::function_ref, + bool /*IsOriginal*/)>; +/// Enumerates spans of tokens from the tree consecutively laid out in memory. +void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { + struct Enumerator { + Enumerator(ProcessTokensFn Callback) + : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Callback(Callback) {} + + void run(const syntax::Tree *Root) { + process(Root); + // Report the last span to the user. + if (SpanBegin) + Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); + } + + private: + void process(const syntax::Node *N) { + if (auto *T = dyn_cast(N)) { + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) + process(C); + return; + } + + auto *L = cast(N); + if (SpanEnd == L->token() && SpanIsOriginal == L->isOriginal()) { + // Extend the current span. + ++SpanEnd; + return; + } + // Report the current span to the user. + if (SpanBegin) + Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); + // Start recording a new span. + SpanBegin = L->token(); + SpanEnd = SpanBegin + 1; + SpanIsOriginal = L->isOriginal(); + } + + const syntax::Token *SpanBegin; + const syntax::Token *SpanEnd; + bool SpanIsOriginal; + ProcessTokensFn Callback; + }; + + return Enumerator(Callback).run(Root); +} + +syntax::FileRange rangeOfExpanded(const syntax::Arena &A, + llvm::ArrayRef Expanded) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + // Check that \p Expanded actually points into expanded tokens. + assert(Buffer.expandedTokens().begin() <= Expanded.begin()); + assert(Expanded.end() < Buffer.expandedTokens().end()); + + if (Expanded.empty()) + // (!) empty tokens must always point before end(). + return syntax::FileRange( + SM, SM.getExpansionLoc(Expanded.begin()->location()), /*Length=*/0); + + auto Spelled = Buffer.spelledForExpanded(Expanded); + assert(Spelled && "could not find spelled tokens for expanded"); + return syntax::Token::range(SM, Spelled->front(), Spelled->back()); +} +} // namespace + +tooling::Replacements +syntax::computeReplacements(const syntax::Arena &A, + const syntax::TranslationUnit &TU) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + tooling::Replacements Replacements; + // Text inserted by the replacement we are building now. + std::string Replacement; + auto emitReplacement = [&](llvm::ArrayRef ReplacedRange) { + if (ReplacedRange.empty() && Replacement.empty()) + return; + llvm::cantFail(Replacements.add(tooling::Replacement( + SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + Replacement = ""; + }; + + const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); + enumerateTokenSpans( + &TU, [&](llvm::ArrayRef Tokens, bool IsOriginal) { + if (!IsOriginal) { + Replacement += + syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); + return; + } + assert(NextOriginal <= Tokens.begin()); + // We are looking at a span of original tokens. + if (NextOriginal != Tokens.begin()) { + // There is a gap, record a replacement or deletion. + emitReplacement(llvm::makeArrayRef(NextOriginal, Tokens.begin())); + } else { + // No gap, but we may have pending insertions. Emit them now. + emitReplacement(llvm::makeArrayRef(NextOriginal, /*Length=*/0)); + } + NextOriginal = Tokens.end(); + }); + + // We might have pending replacements at the end of file. If so, emit them. + emitReplacement(llvm::makeArrayRef( + NextOriginal, Buffer.expandedTokens().drop_back().end())); + + return Replacements; +} diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp new file mode 100644 index 0000000..a34598c --- /dev/null +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -0,0 +1,84 @@ +//===- Mutations.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Token.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include +#include + +using namespace clang; + +// This class has access to the internals of tree nodes. Its sole purpose is to +// define helpers that allow implementing the high-level mutation operations. +class syntax::MutationsImpl { +public: + /// Add a new node with a specified role. + static void addAfter(syntax::Node *Anchor, syntax::Node *New, NodeRole Role) { + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(!New->isDetached()); + assert(Role != NodeRole::Detached); + + New->Role = static_cast(Role); + Anchor->parent()->replaceChildRangeLowLevel(Anchor, Anchor, New); + } + + /// Replace the node, keeping the role. + static void replace(syntax::Node *Old, syntax::Node *New) { + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(New->isDetached()); + + New->Role = Old->Role; + Old->parent()->replaceChildRangeLowLevel(findPrevious(Old), + Old->nextSibling(), New); + } + + /// Completely remove the node from its parent. + static void remove(syntax::Node *N) { + N->parent()->replaceChildRangeLowLevel(findPrevious(N), N->nextSibling(), + /*New=*/nullptr); + } + +private: + static syntax::Node *findPrevious(syntax::Node *N) { + if (N->parent()->firstChild() == N) + return nullptr; + for (syntax::Node *C = N->parent()->firstChild(); C != nullptr; + C = C->nextSibling()) { + if (C->nextSibling() == N) + return C; + } + llvm_unreachable("could not find a child node"); + } +}; + +void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { + assert(S->canModify()); + + if (auto *Parent = llvm::dyn_cast(S->parent())) { + // A child of CompoundStatement can just be safely removed. + MutationsImpl::remove(S); + return; + } + // For the rest, we have to replace with an empty statement. + if (isa(S)) + return; // already an empty statement, nothing to do. + + MutationsImpl::replace(S, createEmptyStatement(A)); +} diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp new file mode 100644 index 0000000..73ae71f --- /dev/null +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -0,0 +1,38 @@ +//===- Synthesis.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/BuildTree.h" + +using namespace clang; + +/// Exposes private syntax tree APIs required to implement node synthesis. +/// Should not be used for anything else. +class syntax::FactoryImpl { +public: + static void prependChildLowLevel(syntax::Tree *T, syntax::Node *Child, + syntax::NodeRole R) { + T->prependChildLowLevel(Child, R); + } +}; + +clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A, + clang::tok::TokenKind K) { + auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer( + clang::tok::getPunctuatorSpelling(K))) + .second; + assert(Tokens.size() == 1); + assert(Tokens.front().kind() == K); + return new (A.allocator()) clang::syntax::Leaf(Tokens.begin()); +} + +clang::syntax::EmptyStatement * +syntax::createEmptyStatement(clang::syntax::Arena &A) { + auto *S = new (A.allocator()) clang::syntax::EmptyStatement; + FactoryImpl::prependChildLowLevel(S, createPunctuation(A, clang::tok::semi), + NodeRole::Unknown); + return S; +} diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 9659582..39d6dba 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -67,7 +67,7 @@ FileRange syntax::Token::range(const SourceManager &SM, auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert(F.endOffset() <= L.beginOffset() && "wrong order of tokens"); + assert(F == L || F.endOffset() <= L.beginOffset() && "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -135,6 +135,12 @@ llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { return {Begin, End}; } +CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { + return CharSourceRange( + SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)), + /*IsTokenRange=*/false); +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index a32d827..d7436ae 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -40,7 +40,10 @@ bool syntax::Leaf::classof(const Node *N) { syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), Kind(static_cast(Kind)), - Role(static_cast(NodeRole::Detached)) {} + Role(static_cast(NodeRole::Detached)), Original(false), + CanModify(false) {} + +bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } @@ -56,6 +59,48 @@ void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { this->FirstChild = Child; } +void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, + Node *New) { + assert(!BeforeBegin || BeforeBegin->Parent == this); + +#ifndef NDEBUG + for (auto *N = New; N; N = N->nextSibling()) { + assert(N->Parent == nullptr); + assert(N->role() != NodeRole::Detached && "Roles must be set"); + // FIXME: sanity-check the role. + } +#endif + + // Detach old nodes. + for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->nextSibling(); + N != End;) { + auto *Next = N->NextSibling; + + N->Role = static_cast(NodeRole::Detached); + N->Parent = nullptr; + N->NextSibling = nullptr; + + N = Next; + } + + // Attach new nodes. + if (BeforeBegin) + BeforeBegin->NextSibling = New ? New : End; + else + FirstChild = New ? New : End; + + if (New) { + auto *Last = New; + while (auto *Next = Last->nextSibling()) + Last = Next; + Last->NextSibling = End; + } + + // Mark the node as modified. + for (auto *T = this; T && T->Original; T = T->Parent) + T->Original = false; +} + namespace { static void traverse(const syntax::Node *N, llvm::function_ref Visit) { @@ -85,9 +130,15 @@ static void dumpTokens(llvm::raw_ostream &OS, ArrayRef Tokens, static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector IndentMask) { + std::string Marks; + if (!N->isOriginal()) + Marks += "M"; if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - // FIXME: find a nice way to print other roles. + Marks += "*"; // FIXME: find a nice way to print other roles. + if (!N->canModify()) + Marks += "I"; + if (!Marks.empty()) + OS << Marks << ": "; if (auto *L = llvm::dyn_cast(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); @@ -133,10 +184,35 @@ std::string syntax::Node::dumpTokens(const Arena &A) const { if (!L) return; ::dumpTokens(OS, *L->token(), A.sourceManager()); + OS << " "; }); return OS.str(); } +syntax::Leaf *syntax::Tree::firstLeaf() { + auto *T = this; + while (auto *C = T->firstChild()) { + if (auto *L = dyn_cast(C)) + return L; + T = cast(C); + } + return nullptr; +} + +syntax::Leaf *syntax::Tree::lastLeaf() { + auto *T = this; + while (auto *C = T->firstChild()) { + // Find the last child. + while (auto *Next = C->nextSibling()) + C = Next; + + if (auto *L = dyn_cast(C)) + return L; + T = cast(C); + } + return nullptr; +} + syntax::Node *syntax::Tree::findChild(NodeRole R) { for (auto *C = FirstChild; C; C = C->nextSibling()) { if (C->role() == R) diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt index f9d079b..5de7d24 100644 --- a/clang/unittests/Tooling/Syntax/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt @@ -15,6 +15,7 @@ clang_target_link_libraries(SyntaxTests clangLex clangSerialization clangTooling + clangToolingCore clangToolingSyntax ) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index e9c11c7..bcc6f29 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -9,14 +9,24 @@ #include "clang/Tooling/Syntax/Tree.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/LLVM.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Mutations.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -24,6 +34,15 @@ using namespace clang; namespace { +static llvm::ArrayRef tokens(syntax::Node *N) { + assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); + if (auto *L = dyn_cast(N)) + return llvm::makeArrayRef(L->token(), 1); + auto *T = cast(N); + return llvm::makeArrayRef(T->firstLeaf()->token(), + T->lastLeaf()->token() + 1); +} + class SyntaxTreeTest : public ::testing::Test { protected: // Build a syntax tree for the code. @@ -80,13 +99,13 @@ protected: // Prepare to run a compiler. std::vector Args = {"syntax-test", "-std=c++11", "-fsyntax-only", FileName}; - auto CI = createInvocationFromCommandLine(Args, Diags, FS); - assert(CI); - CI->getFrontendOpts().DisableFree = false; - CI->getPreprocessorOpts().addRemappedFile( + Invocation = createInvocationFromCommandLine(Args, Diags, FS); + assert(Invocation); + Invocation->getFrontendOpts().DisableFree = false; + Invocation->getPreprocessorOpts().addRemappedFile( FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); CompilerInstance Compiler; - Compiler.setInvocation(std::move(CI)); + Compiler.setInvocation(Invocation); Compiler.setDiagnostics(Diags.get()); Compiler.setFileManager(FileMgr.get()); Compiler.setSourceManager(SourceMgr.get()); @@ -108,6 +127,27 @@ protected: } } + /// Finds the deepest node in the tree that covers exactly \p R. + /// FIXME: implement this efficiently and move to public syntax tree API. + syntax::Node *nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { + llvm::ArrayRef Toks = tokens(Root); + + if (Toks.front().location().isFileID() && + Toks.back().location().isFileID() && + syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) == + syntax::FileRange(SourceMgr->getMainFileID(), R.Begin, R.End)) + return Root; + + auto *T = dyn_cast(Root); + if (!T) + return nullptr; + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) { + if (auto *Result = nodeByRange(R, C)) + return Result; + } + return nullptr; + } + // Data fields. llvm::IntrusiveRefCntPtr Diags = new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); @@ -117,6 +157,7 @@ protected: new FileManager(FileSystemOptions(), FS); llvm::IntrusiveRefCntPtr SourceMgr = new SourceManager(*Diags, *FileMgr); + std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr Arena; }; @@ -695,6 +736,39 @@ extern "C" { int b; int c; } | `-; `-} )txt"}, + // Some nodes are non-modifiable, they are marked with 'I:'. + {R"cpp( +#define HALF_IF if (1+ +#define HALF_IF_2 1) {} +void test() { + HALF_IF HALF_IF_2 else {} +})cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-IfStatement + | |-I: if + | |-I: ( + | |-I: UnknownExpression + | | |-I: 1 + | | |-I: + + | | `-I: 1 + | |-I: ) + | |-I: CompoundStatement + | | |-I: { + | | `-I: } + | |-else + | `-CompoundStatement + | |-{ + | `-} + `-} + )txt"}, }; for (const auto &T : Cases) { @@ -706,4 +780,45 @@ extern "C" { int b; int c; } EXPECT_EQ(Expected, Actual) << "the resulting dump is:\n" << Actual; } } + +TEST_F(SyntaxTreeTest, Mutations) { + using Transformation = std::function; + auto CheckTransformation = [this](std::string Input, std::string Expected, + Transformation Transform) -> void { + llvm::Annotations Source(Input); + auto *Root = buildTree(Source.code()); + + Transform(Source, Root); + + auto Replacements = syntax::computeReplacements(*Arena, *Root); + auto Output = tooling::applyAllReplacements(Source.code(), Replacements); + if (!Output) { + ADD_FAILURE() << "could not apply replacements: " + << llvm::toString(Output.takeError()); + return; + } + + EXPECT_EQ(Expected, *Output) << "input is:\n" << Input; + }; + + // Removes the selected statement. Input should have exactly one selected + // range and it should correspond to a single statement. + auto RemoveStatement = [this](const llvm::Annotations &Input, + syntax::TranslationUnit *TU) { + auto *S = cast(nodeByRange(Input.range(), TU)); + ASSERT_TRUE(S->canModify()) << "cannot remove a statement"; + syntax::removeStatement(*Arena, S); + }; + + std::vector> + Cases = { + {"void test() { [[100+100;]] test(); }", "void test() { test(); }"}, + {"void test() { if (true) [[{}]] else {} }", + "void test() { if (true) ; else {} }"}, + {"void test() { [[;]] }", "void test() { }"}}; + for (const auto &C : Cases) + CheckTransformation(C.first, C.second, RemoveStatement); +} + } // namespace -- 2.7.4