From cd2292ef824591cc34cc299910a3098545c840c7 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 24 May 2022 20:21:45 +0200 Subject: [PATCH] [pseudo] A basic implementation of compiling cxx grammar at build time. The main idea is to compile the cxx grammar at build time, and construct the core pieces (Grammar, LRTable) of the pseudoparse based on the compiled data sources. This is a tiny implementation, which is good for start: - defines how the public API should look like; - integrates the cxx grammar compilation workflow with the cmake system. - onlynonterminal symbols of the C++ grammar are compiled, anything else are still doing the real compilation work at runtime, we can opt-in more bits in the future; - splits the monolithic clangPsuedo library for better layering; Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D125667 --- clang-tools-extra/pseudo/CMakeLists.txt | 2 + clang-tools-extra/pseudo/gen/CMakeLists.txt | 10 +++ clang-tools-extra/pseudo/gen/Main.cpp | 89 ++++++++++++++++++++++ clang-tools-extra/pseudo/include/CMakeLists.txt | 29 +++++++ .../pseudo/include/clang-pseudo/cxx/CXX.h | 51 +++++++++++++ clang-tools-extra/pseudo/lib/CMakeLists.txt | 9 +-- clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt | 9 +++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp | 34 +++++++++ .../pseudo/lib/grammar/CMakeLists.txt | 18 +++++ .../pseudo/lib/{ => grammar}/Grammar.cpp | 0 .../pseudo/lib/{ => grammar}/GrammarBNF.cpp | 0 .../pseudo/lib/{ => grammar}/LRGraph.cpp | 0 .../pseudo/lib/{ => grammar}/LRTable.cpp | 0 .../pseudo/lib/{ => grammar}/LRTableBuild.cpp | 0 14 files changed, 246 insertions(+), 5 deletions(-) create mode 100644 clang-tools-extra/pseudo/gen/CMakeLists.txt create mode 100644 clang-tools-extra/pseudo/gen/Main.cpp create mode 100644 clang-tools-extra/pseudo/include/CMakeLists.txt create mode 100644 clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h create mode 100644 clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt create mode 100644 clang-tools-extra/pseudo/lib/cxx/CXX.cpp create mode 100644 clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt rename clang-tools-extra/pseudo/lib/{ => grammar}/Grammar.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/GrammarBNF.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/LRGraph.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/LRTable.cpp (100%) rename clang-tools-extra/pseudo/lib/{ => grammar}/LRTableBuild.cpp (100%) diff --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt index 0891cc0..24bc153 100644 --- a/clang-tools-extra/pseudo/CMakeLists.txt +++ b/clang-tools-extra/pseudo/CMakeLists.txt @@ -1,5 +1,7 @@ include_directories(include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(include) +add_subdirectory(gen) add_subdirectory(lib) add_subdirectory(tool) add_subdirectory(fuzzer) diff --git a/clang-tools-extra/pseudo/gen/CMakeLists.txt b/clang-tools-extra/pseudo/gen/CMakeLists.txt new file mode 100644 index 0000000..a104e05 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_executable(pseudo-gen + Main.cpp + ) + +target_link_libraries(pseudo-gen + PRIVATE + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp new file mode 100644 index 0000000..535f863 --- /dev/null +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -0,0 +1,89 @@ +//===--- Main.cpp - Compile BNF grammar -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a tool to compile a BNF grammar, it is used by the build system to +// generate a necessary data bits to statically construct core pieces (Grammar, +// LRTable etc) of the LR parser. +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; +using llvm::cl::values; + +namespace { +enum EmitType { + EmitSymbolList, + EmitGrammarContent, +}; + +opt Grammar("grammar", desc("Parse a BNF grammar file."), + init("")); +opt + Emit(desc("which information to emit:"), + values(clEnumValN(EmitSymbolList, "emit-symbol-list", + "Print nonterminal symbols (default)"), + clEnumValN(EmitGrammarContent, "emit-grammar-content", + "Print the BNF grammar content as a string"))); +std::string readOrDie(llvm::StringRef Path) { + llvm::ErrorOr> Text = + llvm::MemoryBuffer::getFile(Path); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << Path + << "': " << EC.message() << "\n"; + ::exit(1); + } + return Text.get()->getBuffer().str(); +} +} // namespace + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + if (!Grammar.getNumOccurrences()) { + llvm::errs() << "Grammar file must be provided!\n"; + return 1; + } + + std::string GrammarText = readOrDie(Grammar); + std::vector Diags; + auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 1; + } + switch (Emit) { + + case EmitSymbolList: + for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); + ++ID) { + std::string Name = G->symbolName(ID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID)); + } + break; + case EmitGrammarContent: + for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { + llvm::outs() << '"'; + llvm::outs().write_escaped((Line + "\n").str()); + llvm::outs() << "\"\n"; + } + break; + } + + return 0; +} diff --git a/clang-tools-extra/pseudo/include/CMakeLists.txt b/clang-tools-extra/pseudo/include/CMakeLists.txt new file mode 100644 index 0000000..e2a6f0e --- /dev/null +++ b/clang-tools-extra/pseudo/include/CMakeLists.txt @@ -0,0 +1,29 @@ +# The cxx.bnf grammar file +set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf) + +# Generate inc files. +set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc) +add_custom_command(OUTPUT ${cxx_symbols_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-symbol-list + > ${cxx_symbols_inc} + COMMENT "Generating nonterminal symbol file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc) +add_custom_command(OUTPUT ${cxx_bnf_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-grammar-content + > ${cxx_bnf_inc} + COMMENT "Generating bnf string file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +# add_custom_command does not create a new target, we need to deine a target +# explicitly, so that other targets can depend on it. +add_custom_target(cxx_gen + DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc} + VERBATIM) diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h new file mode 100644 index 0000000..edeeb63 --- /dev/null +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -0,0 +1,51 @@ +//===--- CXX.h - Public interfaces for the C++ grammar -----------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines public interfaces for the C++ grammar +// (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces +// of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar +// file at the runtime. +// +// We do a compilation of the C++ BNF grammar at build time, and generate +// critical data sources. The implementation of the interfaces are based on the +// generated data sources. +// +// FIXME: not everything is fully compiled yet. The implementation of the +// interfaces are still parsing the grammar file at the runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_CXX_CXX_H +#define CLANG_PSEUDO_CXX_CXX_H + +#include "clang-pseudo/Grammar.h" + +namespace clang { +namespace pseudo { +class LRTable; + +namespace cxx { +// Symbol represents nonterminal symbols in the C++ grammar. +// It provides a simple uniform way to access a particular nonterminal. +enum class Symbol : SymbolID { +#define NONTERMINAL(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef NONTERMINAL +}; + +// Returns the C++ grammar. +const Grammar &getGrammar(); +// Returns the corresponding LRTable for the C++ grammar. +const LRTable &getLRTable(); + +} // namespace cxx + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CXX_CXX_H diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt index 6dc8ed5..f312b10 100644 --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,3 +1,6 @@ +add_subdirectory(cxx) +add_subdirectory(grammar) + set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangPseudo @@ -5,15 +8,11 @@ add_clang_library(clangPseudo DirectiveTree.cpp Forest.cpp GLR.cpp - Grammar.cpp - GrammarBNF.cpp Lex.cpp - LRGraph.cpp - LRTable.cpp - LRTableBuild.cpp Token.cpp LINK_LIBS clangBasic clangLex + clangPseudoGrammar ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt new file mode 100644 index 0000000..9e10f2b --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -0,0 +1,9 @@ +add_clang_library(clangPseudoCXX + CXX.cpp + + DEPENDS + cxx_gen + + LINK_LIBS + clangPseudoGrammar + ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp new file mode 100644 index 0000000..3d594b7 --- /dev/null +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -0,0 +1,34 @@ +//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/LRTable.h" + +namespace clang { +namespace pseudo { +namespace cxx { + +static const char *CXXBNF = +#include "CXXBNF.inc" + ; + +const Grammar &getGrammar() { + static std::vector Diags; + static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release(); + assert(Diags.empty()); + return *G; +} + +const LRTable &getLRTable() { + static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar())); + return *Table; +} + +} // namespace cxx +} // namespace pseudo +} // namespace clang diff --git a/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt new file mode 100644 index 0000000..d50cb7d --- /dev/null +++ b/clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_LINK_COMPONENTS Support) + +# This library intents to keep as minimal dependencies as possible, it is a base +# library of the cxx generator, to avoid creating long dep paths in the build +# graph. +add_clang_library(clangPseudoGrammar + Grammar.cpp + GrammarBNF.cpp + LRGraph.cpp + LRTable.cpp + LRTableBuild.cpp + + # FIXME: can we get rid of the clangBasic dependency? We need it for the + # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we + # could consider remimplement these functions. + LINK_LIBS + clangBasic + ) diff --git a/clang-tools-extra/pseudo/lib/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/Grammar.cpp rename to clang-tools-extra/pseudo/lib/grammar/Grammar.cpp diff --git a/clang-tools-extra/pseudo/lib/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/GrammarBNF.cpp rename to clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp diff --git a/clang-tools-extra/pseudo/lib/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/LRGraph.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp diff --git a/clang-tools-extra/pseudo/lib/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/LRTable.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRTable.cpp diff --git a/clang-tools-extra/pseudo/lib/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp similarity index 100% rename from clang-tools-extra/pseudo/lib/LRTableBuild.cpp rename to clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp -- 2.7.4