From 91dd67ef724c07f28d4cb52448539f12ad488f2a Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 28 Oct 2019 09:45:53 -0700 Subject: [PATCH] Introduce some types and functions to make it easier to work with the tblgen AST node hierarchies. Not totally NFC because both of the emitters now emit in a different order. The type-nodes emitter now visits nodes in hierarchy order, which means we could use range checks in classof if we had any types that would benefit from that; currently we do not. The AST-nodes emitter now uses a multimap keyed by the name of the record; previously it was using `Record*`, which of couse isn't stable across processes and may have led to non-reproducible builds in some circumstances. --- clang/utils/TableGen/ASTTableGen.cpp | 130 +++++++++++++++++++++++++ clang/utils/TableGen/ASTTableGen.h | 113 ++++++++++++++++++++- clang/utils/TableGen/CMakeLists.txt | 1 + clang/utils/TableGen/ClangASTNodesEmitter.cpp | 82 ++++++++-------- clang/utils/TableGen/ClangTypeNodesEmitter.cpp | 58 +++++------ 5 files changed, 307 insertions(+), 77 deletions(-) create mode 100644 clang/utils/TableGen/ASTTableGen.cpp diff --git a/clang/utils/TableGen/ASTTableGen.cpp b/clang/utils/TableGen/ASTTableGen.cpp new file mode 100644 index 0000000..c28d5ba --- /dev/null +++ b/clang/utils/TableGen/ASTTableGen.cpp @@ -0,0 +1,130 @@ +//=== ASTTableGen.cpp - Helper functions for working with AST records -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some helper functions for working with tblegen reocrds +// for the Clang AST: that is, the contents of files such as DeclNodes.td, +// StmtNodes.td, and TypeNodes.td. +// +//===----------------------------------------------------------------------===// + +#include "ASTTableGen.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/Error.h" + +using namespace llvm; +using namespace clang; +using namespace clang::tblgen; + +static StringRef removeExpectedNodeNameSuffix(Record *node, StringRef suffix) { + StringRef nodeName = node->getName(); + if (!nodeName.endswith(suffix)) { + PrintFatalError(node->getLoc(), + Twine("name of node doesn't end in ") + suffix); + } + return nodeName.drop_back(suffix.size()); +} + +// Decl node names don't end in Decl for historical reasons, and it would +// be somewhat annoying to fix now. Conveniently, this means the ID matches +// is exactly the node name, and the class name is simply that plus Decl. +std::string clang::tblgen::DeclNode::getClassName() const { + return (Twine(getName()) + "Decl").str(); +} +StringRef clang::tblgen::DeclNode::getId() const { + return getName(); +} + +// Type nodes are all named ending in Type, just like the corresponding +// C++ class, and the ID just strips this suffix. +StringRef clang::tblgen::TypeNode::getClassName() const { + return getName(); +} +StringRef clang::tblgen::TypeNode::getId() const { + return removeExpectedNodeNameSuffix(getRecord(), "Type"); +} + +// Stmt nodes are named the same as the C++ class, which has no regular +// naming convention (all the non-expression statements end in Stmt, +// and *many* expressions end in Expr, but there are also several +// core expression classes like IntegerLiteral and BinaryOperator with +// no standard suffix). The ID adds "Class" for historical reasons. +StringRef clang::tblgen::StmtNode::getClassName() const { + return getName(); +} +std::string clang::tblgen::StmtNode::getId() const { + return (Twine(getName()) + "Class").str(); +} + +// A map from a node to each of its child nodes. +template +using ChildMap = std::multimap; + +template +static void visitASTNodeRecursive(NodeClass node, NodeClass base, + const ChildMap &map, + ASTNodeHierarchyVisitor visit) { + visit(node, base); + + auto i = map.lower_bound(node), e = map.upper_bound(node); + for (; i != e; ++i) { + visitASTNodeRecursive(i->second, node, map, visit); + } +} + +template +static void visitHierarchy(RecordKeeper &records, + StringRef nodeClassName, + ASTNodeHierarchyVisitor visit) { + // Check for the node class, just as a sanity check. + if (!records.getClass(nodeClassName)) { + PrintFatalError(Twine("cannot find definition for node class ") + + nodeClassName); + } + + // Find all the nodes in the hierarchy. + auto nodes = records.getAllDerivedDefinitions(nodeClassName); + + // Derive the child map. + ChildMap hierarchy; + NodeClass root; + for (NodeClass node : nodes) { + if (auto base = node.getBase()) + hierarchy.insert(std::make_pair(base, node)); + else if (root) + PrintFatalError(node.getLoc(), + "multiple root nodes in " + nodeClassName + " hierarchy"); + else + root = node; + } + if (!root) + PrintFatalError(Twine("no root node in ") + nodeClassName + " hierarchy"); + + // Now visit the map recursively, starting at the root node. + visitASTNodeRecursive(root, NodeClass(), hierarchy, visit); +} + +void clang::tblgen::visitASTNodeHierarchy(RecordKeeper &records, + StringRef nodeClassName, + ASTNodeHierarchyVisitor visit) { + visitHierarchy(records, nodeClassName, visit); +} + +void clang::tblgen::visitDeclNodeHierarchy(RecordKeeper &records, + ASTNodeHierarchyVisitor visit) { + visitHierarchy(records, DeclNodeClassName, visit); +} + +void clang::tblgen::visitTypeNodeHierarchy(RecordKeeper &records, + ASTNodeHierarchyVisitor visit) { + visitHierarchy(records, TypeNodeClassName, visit); +} + +void clang::tblgen::visitStmtNodeHierarchy(RecordKeeper &records, + ASTNodeHierarchyVisitor visit) { + visitHierarchy(records, StmtNodeClassName, visit); +} diff --git a/clang/utils/TableGen/ASTTableGen.h b/clang/utils/TableGen/ASTTableGen.h index ae85fc9..3d623aa 100644 --- a/clang/utils/TableGen/ASTTableGen.h +++ b/clang/utils/TableGen/ASTTableGen.h @@ -14,9 +14,9 @@ // These are spellings in the tblgen files. -// The field name for the base-node property. -// Fortunately, this is common across all the hierarchies. +// Field names that are fortunately common across the hierarchies. #define BaseFieldName "Base" +#define AbstractFieldName "Abstract" // Comment node hierarchy. #define CommentNodeClassName "CommentNode" @@ -34,6 +34,113 @@ #define NeverCanonicalClassName "NeverCanonical" #define NeverCanonicalUnlessDependentClassName "NeverCanonicalUnlessDependent" #define LeafTypeClassName "LeafType" -#define AbstractFieldName "Abstract" + +// Property node hierarchy. +#define PropertyClassName "Property" +#define ClassFieldName "Class" + +namespace clang { +namespace tblgen { + +/// An (optional) reference to a TableGen node representing a class +/// in one of Clang's AST hierarchies. +class ASTNode { + llvm::Record *Record; +public: + ASTNode(llvm::Record *record = nullptr) : Record(record) {} + + explicit operator bool() const { return Record != nullptr; } + + llvm::Record *getRecord() const { return Record; } + llvm::StringRef getName() const { + assert(Record && "getting name of null record"); + return Record->getName(); + } + llvm::ArrayRef getLoc() const { + assert(Record && "getting location of null record"); + return Record->getLoc(); + } + + /// Return the node for the base, if there is one. + ASTNode getBase() const { + assert(Record && "getting base of null record"); + return Record->getValueAsOptionalDef(BaseFieldName); + } + + /// Is the corresponding class abstract? + bool isAbstract() const { + assert(Record && "querying null record"); + return Record->getValueAsBit(AbstractFieldName); + } + + /// Does the node inherit from the given TableGen class? + bool isSubClassOf(llvm::StringRef className) const { + assert(Record && "querying null record"); + return Record->isSubClassOf(className); + } + + friend bool operator<(ASTNode lhs, ASTNode rhs) { + assert(lhs && rhs && "sorting null nodes"); + return lhs.getName() < rhs.getName(); + } + friend bool operator>(ASTNode lhs, ASTNode rhs) { return rhs < lhs; } + friend bool operator<=(ASTNode lhs, ASTNode rhs) { return !(rhs < lhs); } + friend bool operator>=(ASTNode lhs, ASTNode rhs) { return !(lhs < rhs); } + + friend bool operator==(ASTNode lhs, ASTNode rhs) { + // This should handle null nodes. + return lhs.getRecord() == rhs.getRecord(); + } + friend bool operator!=(ASTNode lhs, ASTNode rhs) { return !(lhs == rhs); } +}; + +class DeclNode : public ASTNode { +public: + DeclNode(llvm::Record *record = nullptr) : ASTNode(record) {} + + llvm::StringRef getId() const; + std::string getClassName() const; + DeclNode getBase() const { return DeclNode(ASTNode::getBase().getRecord()); } +}; + +class TypeNode : public ASTNode { +public: + TypeNode(llvm::Record *record = nullptr) : ASTNode(record) {} + + llvm::StringRef getId() const; + llvm::StringRef getClassName() const; + TypeNode getBase() const { return TypeNode(ASTNode::getBase().getRecord()); } +}; + +class StmtNode : public ASTNode { +public: + StmtNode(llvm::Record *record = nullptr) : ASTNode(record) {} + + std::string getId() const; + llvm::StringRef getClassName() const; + StmtNode getBase() const { return StmtNode(ASTNode::getBase().getRecord()); } +}; + +/// A visitor for an AST node hierarchy. Note that `base` can be null for +/// the root class. +template +using ASTNodeHierarchyVisitor = + llvm::function_ref; + +void visitASTNodeHierarchy(llvm::RecordKeeper &records, + llvm::StringRef nodeClassName, + ASTNodeHierarchyVisitor visit); + +void visitDeclNodeHierarchy(llvm::RecordKeeper &records, + ASTNodeHierarchyVisitor visit); + +void visitTypeNodeHierarchy(llvm::RecordKeeper &records, + ASTNodeHierarchyVisitor visit); + +void visitStmtNodeHierarchy(llvm::RecordKeeper &records, + ASTNodeHierarchyVisitor visit); + +} // end namespace clang::tblgen +} // end namespace clang #endif diff --git a/clang/utils/TableGen/CMakeLists.txt b/clang/utils/TableGen/CMakeLists.txt index c685a2c..84d5488 100644 --- a/clang/utils/TableGen/CMakeLists.txt +++ b/clang/utils/TableGen/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS Support) add_tablegen(clang-tblgen CLANG + ASTTableGen.cpp ClangASTNodesEmitter.cpp ClangAttrEmitter.cpp ClangCommentCommandInfoEmitter.cpp diff --git a/clang/utils/TableGen/ClangASTNodesEmitter.cpp b/clang/utils/TableGen/ClangASTNodesEmitter.cpp index 0a4ce87..c1bb00b 100644 --- a/clang/utils/TableGen/ClangASTNodesEmitter.cpp +++ b/clang/utils/TableGen/ClangASTNodesEmitter.cpp @@ -21,6 +21,8 @@ #include #include using namespace llvm; +using namespace clang; +using namespace clang::tblgen; /// ClangASTNodesEmitter - The top-level class emits .inc files containing /// declarations of Clang statements. @@ -28,11 +30,11 @@ using namespace llvm; namespace { class ClangASTNodesEmitter { // A map from a node to each of its derived nodes. - typedef std::multimap ChildMap; + typedef std::multimap ChildMap; typedef ChildMap::const_iterator ChildIterator; RecordKeeper &Records; - Record *Root = nullptr; + ASTNode Root; const std::string &NodeClassName; const std::string &BaseSuffix; std::string MacroHierarchyName; @@ -49,23 +51,23 @@ class ClangASTNodesEmitter { const std::string ¯oHierarchyName() { assert(Root && "root node not yet derived!"); if (MacroHierarchyName.empty()) - MacroHierarchyName = macroName(Root->getName()); + MacroHierarchyName = macroName(Root.getName()); return MacroHierarchyName; } // Return the name to be printed in the base field. Normally this is // the record's name plus the base suffix, but if it is the root node and // the suffix is non-empty, it's just the suffix. - std::string baseName(Record &R) { - if (&R == Root && !BaseSuffix.empty()) + std::string baseName(ASTNode node) { + if (node == Root && !BaseSuffix.empty()) return BaseSuffix; - return R.getName().str() + BaseSuffix; + return node.getName().str() + BaseSuffix; } void deriveChildTree(); - std::pair EmitNode(raw_ostream& OS, Record *Base); + std::pair EmitNode(raw_ostream& OS, ASTNode Base); public: explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N, const std::string &S) @@ -82,60 +84,58 @@ public: // Returns the first and last non-abstract subrecords // Called recursively to ensure that nodes remain contiguous -std::pair ClangASTNodesEmitter::EmitNode(raw_ostream &OS, - Record *Base) { - std::string BaseName = macroName(Base->getName()); +std::pair ClangASTNodesEmitter::EmitNode(raw_ostream &OS, + ASTNode Base) { + std::string BaseName = macroName(Base.getName()); ChildIterator i = Tree.lower_bound(Base), e = Tree.upper_bound(Base); + bool HasChildren = (i != e); - Record *First = nullptr, *Last = nullptr; - if (!Base->getValueAsBit(AbstractFieldName)) + ASTNode First, Last; + if (!Base.isAbstract()) First = Last = Base; for (; i != e; ++i) { - Record *R = i->second; - bool Abstract = R->getValueAsBit(AbstractFieldName); - std::string NodeName = macroName(R->getName()); + ASTNode Child = i->second; + bool Abstract = Child.isAbstract(); + std::string NodeName = macroName(Child.getName()); OS << "#ifndef " << NodeName << "\n"; OS << "# define " << NodeName << "(Type, Base) " << BaseName << "(Type, Base)\n"; OS << "#endif\n"; - if (Abstract) - OS << "ABSTRACT_" << macroHierarchyName() << "(" << NodeName << "(" - << R->getName() << ", " << baseName(*Base) << "))\n"; - else - OS << NodeName << "(" << R->getName() << ", " - << baseName(*Base) << ")\n"; - - if (Tree.find(R) != Tree.end()) { - const std::pair &Result - = EmitNode(OS, R); - if (!First && Result.first) - First = Result.first; - if (Result.second) - Last = Result.second; - } else { - if (!Abstract) { - Last = R; - - if (!First) - First = R; - } - } + if (Abstract) OS << "ABSTRACT_" << macroHierarchyName() << "("; + OS << NodeName << "(" << Child.getName() << ", " << baseName(Base) << ")"; + if (Abstract) OS << ")"; + OS << "\n"; + + auto Result = EmitNode(OS, Child); + assert(Result.first && Result.second && "node didn't have children?"); + + // Update the range of Base. + if (!First) First = Result.first; + Last = Result.second; OS << "#undef " << NodeName << "\n\n"; } - if (First) { - assert (Last && "Got a first node but not a last node for a range!"); + // If there aren't first/last nodes, it must be because there were no + // children and this node was abstract, which is not a sensible combination. + if (!First) { + PrintFatalError(Base.getLoc(), "abstract node has no children"); + } + assert(Last && "set First without Last"); + + if (HasChildren) { + // Use FOO_RANGE unless this is the last of the ranges, in which case + // use LAST_FOO_RANGE. if (Base == Root) OS << "LAST_" << macroHierarchyName() << "_RANGE("; else OS << macroHierarchyName() << "_RANGE("; - OS << Base->getName() << ", " << First->getName() << ", " - << Last->getName() << ")\n\n"; + OS << Base.getName() << ", " << First.getName() << ", " + << Last.getName() << ")\n\n"; } return std::make_pair(First, Last); diff --git a/clang/utils/TableGen/ClangTypeNodesEmitter.cpp b/clang/utils/TableGen/ClangTypeNodesEmitter.cpp index 4a37729..ee12a6b 100644 --- a/clang/utils/TableGen/ClangTypeNodesEmitter.cpp +++ b/clang/utils/TableGen/ClangTypeNodesEmitter.cpp @@ -57,6 +57,8 @@ #include using namespace llvm; +using namespace clang; +using namespace clang::tblgen; // These are spellings in the generated output. #define TypeMacroName "TYPE" @@ -70,15 +72,6 @@ using namespace llvm; #define TypeClassName "Type" -static StringRef getIdForType(Record *type) { - // The record name is expected to be the full C++ class name, - // including "Type". Check for that and strip it off. - auto fullName = type->getName(); - if (!fullName.endswith("Type")) - PrintFatalError(type->getLoc(), "name of Type node doesn't end in Type"); - return fullName.drop_back(4); -} - namespace { class TypeNodeEmitter { RecordKeeper &Records; @@ -99,7 +92,7 @@ private: StringRef args); void emitNodeInvocations(); - void emitLastNodeInvocation(); + void emitLastNodeInvocation(TypeNode lastType); void emitLeafNodeInvocations(); void addMacroToUndef(StringRef macroName); @@ -124,7 +117,6 @@ void TypeNodeEmitter::emit() { // Invocations. emitNodeInvocations(); - emitLastNodeInvocation(); emitLeafNodeInvocations(); // Postmatter @@ -143,48 +135,48 @@ void TypeNodeEmitter::emitFallbackDefine(StringRef macroName, } void TypeNodeEmitter::emitNodeInvocations() { - for (auto type : Types) { - // The name without the Type suffix. - StringRef id = getIdForType(type); + TypeNode lastType; - // If this is the Type node itself, skip it. - if (id.empty()) continue; + visitTypeNodeHierarchy(Records, [&](TypeNode type, TypeNode base) { + // If this is the Type node itself, skip it; it can't be handled + // uniformly by metaprograms because it doesn't have a base. + if (!base) return; // Figure out which macro to use. StringRef macroName; auto setMacroName = [&](StringRef newName) { if (!macroName.empty()) - PrintFatalError(type->getLoc(), + PrintFatalError(type.getLoc(), Twine("conflict when computing macro name for " "Type node: trying to use both \"") + macroName + "\" and \"" + newName + "\""); macroName = newName; }; - if (type->isSubClassOf(AlwaysDependentClassName)) + if (type.isSubClassOf(AlwaysDependentClassName)) setMacroName(DependentTypeMacroName); - if (type->isSubClassOf(NeverCanonicalClassName)) + if (type.isSubClassOf(NeverCanonicalClassName)) setMacroName(NonCanonicalTypeMacroName); - if (type->isSubClassOf(NeverCanonicalUnlessDependentClassName)) + if (type.isSubClassOf(NeverCanonicalUnlessDependentClassName)) setMacroName(NonCanonicalUnlessDependentTypeMacroName); - if (type->getValueAsBit(AbstractFieldName)) + if (type.isAbstract()) setMacroName(AbstractTypeMacroName); if (macroName.empty()) macroName = TypeMacroName; - // Compute the base class. - StringRef baseName = TypeClassName; - if (auto base = type->getValueAsOptionalDef(BaseFieldName)) - baseName = base->getName(); - // Generate the invocation line. - Out << macroName << "(" << id << ", " << baseName << ")\n"; - } + Out << macroName << "(" << type.getId() << ", " + << base.getClassName() << ")\n"; + + lastType = type; + }); + + emitLastNodeInvocation(lastType); } -void TypeNodeEmitter::emitLastNodeInvocation() { +void TypeNodeEmitter::emitLastNodeInvocation(TypeNode type) { // We check that this is non-empty earlier. Out << "#ifdef " LastTypeMacroName "\n" - LastTypeMacroName "(" << getIdForType(Types.back()) << ")\n" + LastTypeMacroName "(" << type.getId() << ")\n" "#undef " LastTypeMacroName "\n" "#endif\n"; } @@ -192,9 +184,9 @@ void TypeNodeEmitter::emitLastNodeInvocation() { void TypeNodeEmitter::emitLeafNodeInvocations() { Out << "#ifdef " LeafTypeMacroName "\n"; - for (auto type : Types) { - if (!type->isSubClassOf(LeafTypeClassName)) continue; - Out << LeafTypeMacroName "(" << getIdForType(type) << ")\n"; + for (TypeNode type : Types) { + if (!type.isSubClassOf(LeafTypeClassName)) continue; + Out << LeafTypeMacroName "(" << type.getId() << ")\n"; } Out << "#undef " LeafTypeMacroName "\n" -- 2.7.4