From 48a6aa6c513f471c5931c0f817beca5339ed2425 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 12 Apr 2019 05:57:50 -0700 Subject: [PATCH] [TableGen] Better support for predicate and rewrite rule specification Currently predicates are written with positional placeholders `{N}` and rely on `formatv` as the engine to do substitution. The problem with this approach is that the definitions of those positional placeholders are not consistent; they are entirely up to the defining predicate of question. For example, `{0}` in various attribute constraints is used to mean the attribute, while it is used to main the builder for certain attribute transformations. This can become very confusing. This CL introduces `tgfmt` as a new mechanism to better support for predicate and rewrite rule specification. Instead of entirely relying on positional placeholders, `tgfmt` support both positional and special placeholders. The former is used for DAG operands. The latter, including $_builder, $_op, $_self, are used as special "hooks" to entities in the context. With this, the predicate and rewrite rules specification can be more consistent is more readable. -- PiperOrigin-RevId: 243249671 --- mlir/include/mlir/TableGen/Format.h | 248 +++++++++++++++++++++++++++++++++ mlir/lib/TableGen/CMakeLists.txt | 1 + mlir/lib/TableGen/Format.cpp | 184 ++++++++++++++++++++++++ mlir/unittests/CMakeLists.txt | 1 + mlir/unittests/TableGen/CMakeLists.txt | 5 + mlir/unittests/TableGen/FormatTest.cpp | 166 ++++++++++++++++++++++ 6 files changed, 605 insertions(+) create mode 100644 mlir/include/mlir/TableGen/Format.h create mode 100644 mlir/lib/TableGen/Format.cpp create mode 100644 mlir/unittests/TableGen/CMakeLists.txt create mode 100644 mlir/unittests/TableGen/FormatTest.cpp diff --git a/mlir/include/mlir/TableGen/Format.h b/mlir/include/mlir/TableGen/Format.h new file mode 100644 index 0000000..75ace15 --- /dev/null +++ b/mlir/include/mlir/TableGen/Format.h @@ -0,0 +1,248 @@ +//===- Format.h - Utilities for String Format -------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file declares utilities for formatting strings. They are specially +// tailored to the needs of TableGen'ing op definitions and rewrite rules, +// so they are not expected to be used as widely applicable utilities. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TABLEGEN_FORMAT_H_ +#define MLIR_TABLEGEN_FORMAT_H_ + +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/FormatVariadic.h" + +namespace mlir { +namespace tblgen { + +/// Format context containing substitutions for special placeholders. +/// +/// This context divides special placeholders into two categories: builtin ones +/// and custom ones. +/// +/// Builtin placeholders are baked into `FmtContext` and each one of them has a +/// dedicated setter. They can be used in all dialects. Their names follow the +/// convention of `$_`. The rationale of the leading underscore is to +/// avoid confusion and name collision: op arguments/attributes/results are +/// named as $, and we can potentially support referencing those entities +/// directly in the format template in the future. +// +/// Custom ones are registered by dialect-specific TablGen backends and use the +/// same unified setter. +class FmtContext { +public: + // Placeholder kinds + enum class PHKind : char { + None, + Custom, // For custom placeholders + Builder, // For the $_builder placeholder + Op, // For the $_op placeholder + Self, // For the $_self placeholder + }; + + FmtContext() = default; + + // Setter for custom placeholders + FmtContext &addSubst(StringRef placeholder, Twine subst); + + // Setters for builtin placeholders + FmtContext &withBuilder(Twine subst); + FmtContext &withOp(Twine subst); + FmtContext &withSelf(Twine subst); + + Optional getSubstFor(PHKind placeholder) const; + Optional getSubstFor(StringRef placeholder) const; + + static PHKind getPlaceHolderKind(StringRef str); + +private: + struct PHKindInfo : DenseMapInfo { + using CharInfo = DenseMapInfo; + + static inline PHKind getEmptyKey() { + return static_cast(CharInfo::getEmptyKey()); + } + static inline PHKind getTombstoneKey() { + return static_cast(CharInfo::getTombstoneKey()); + } + static unsigned getHashValue(const PHKind &val) { + return CharInfo::getHashValue(static_cast(val)); + } + + static bool isEqual(const PHKind &lhs, const PHKind &rhs) { + return lhs == rhs; + } + }; + + llvm::SmallDenseMap builtinSubstMap; + llvm::StringMap customSubstMap; +}; + +/// Struct representing a replacement segment for the formatted string. It can +/// be a segment of the formatting template (for `Literal`) or a replacement +/// parameter (for `PositionalPH` and `SpecialPH`). +struct FmtReplacement { + enum class Type { Empty, Literal, PositionalPH, SpecialPH }; + + FmtReplacement() = default; + explicit FmtReplacement(StringRef literal) + : type(Type::Literal), spec(literal) {} + FmtReplacement(StringRef spec, size_t index) + : type(Type::PositionalPH), spec(spec), index(index) {} + FmtReplacement(StringRef spec, FmtContext::PHKind placeholder) + : type(Type::SpecialPH), spec(spec), placeholder(placeholder) {} + + Type type = Type::Empty; + StringRef spec; + size_t index = 0; + FmtContext::PHKind placeholder = FmtContext::PHKind::None; +}; + +class FmtObjectBase { +private: + static std::pair splitFmtSegment(StringRef fmt); + static std::vector parseFormatString(StringRef fmt); + +protected: + // The parameters are stored in a std::tuple, which does not provide runtime + // indexing capabilities. In order to enable runtime indexing, we use this + // structure to put the parameters into a std::vector. Since the parameters + // are not all the same type, we use some type-erasure by wrapping the + // parameters in a template class that derives from a non-template superclass. + // Essentially, we are converting a std::tuple> to a + // std::vector. + struct CreateAdapters { + template + std::vector operator()(Ts &... items) { + return std::vector{&items...}; + } + }; + + StringRef fmt; + const FmtContext *context; + std::vector adapters; + std::vector replacements; + +public: + FmtObjectBase(StringRef fmt, const FmtContext *ctx, size_t numParams) + : fmt(fmt), context(ctx), replacements(parseFormatString(fmt)) {} + + FmtObjectBase(const FmtObjectBase &that) = delete; + + FmtObjectBase(FmtObjectBase &&that) + : fmt(std::move(that.fmt)), context(that.context), + adapters(), // adapters are initialized by FmtObject + replacements(std::move(that.replacements)) {} + + void format(llvm::raw_ostream &s) const; + + std::string str() const { + std::string result; + llvm::raw_string_ostream s(result); + format(s); + return s.str(); + } + + template SmallString sstr() const { + SmallString result; + llvm::raw_svector_ostream s(result); + format(s); + return result; + } + + template operator SmallString() const { return sstr(); } + + operator std::string() const { return str(); } +}; + +template class FmtObject : public FmtObjectBase { + // Storage for the parameter adapters. Since the base class erases the type + // of the parameters, we have to own the storage for the parameters here, and + // have the base class store type-erased pointers into this tuple. + Tuple parameters; + +public: + FmtObject(StringRef fmt, const FmtContext *ctx, Tuple &¶ms) + : FmtObjectBase(fmt, ctx, std::tuple_size::value), + parameters(std::move(params)) { + adapters.reserve(std::tuple_size::value); + adapters = llvm::apply_tuple(CreateAdapters(), parameters); + } + + FmtObject(FmtObject const &that) = delete; + + FmtObject(FmtObject &&that) + : FmtObjectBase(std::move(that)), parameters(std::move(that.parameters)) { + adapters.reserve(that.adapters.size()); + adapters = llvm::apply_tuple(CreateAdapters(), parameters); + } +}; + +/// Formats text by substituting placeholders in format string with replacement +/// parameters. +/// +/// There are two categories of placeholders accepted, both led by a '$' sign: +/// +/// 1. Positional placeholder: $[0-9]+ +/// 2. Special placeholder: $[a-zA-Z_][a-zA-Z0-9_]* +/// +/// Replacement parameters for positional placeholders are supplied as the +/// `vals` parameter pack with 1:1 mapping. That is, $0 will be replaced by the +/// first parameter in `vals`, $1 by the second one, and so on. Note that you +/// can use the positional placeholders in any order and repeat any times, for +/// example, "$2 $1 $1 $0" is accepted. +/// +/// Replacement parameters for special placeholders are supplied using the `ctx` +/// format context. +/// +/// The `fmt` is recorded as a `StringRef` inside the returned `FmtObject`. +/// The caller needs to make sure the underlying data is available when the +/// `FmtObject` is used. +/// +/// `ctx` accepts a nullptr if there is no special placeholder is used. +/// +/// If no substitution is provided for a placeholder or any error happens during +/// format string parsing or replacement, the placeholder will be outputted +/// as-is with an additional marker '', to aid debugging. +/// +/// To print a '$' literally, escape it with '$$'. +/// +/// This utility function is inspired by LLVM formatv(), with modifications +/// specially tailored for TableGen C++ generation usage: +/// +/// 1. This utility use '$' instead of '{' and '}' for denoting the placeholder +/// because '{' and '}' are frequently used in C++ code. +/// 2. This utility does not support format layout because it is rarely needed +/// in C++ code generation. +template +inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&... vals) + -> FmtObject(vals))...))> { + using ParamTuple = decltype(std::make_tuple( + llvm::detail::build_format_adapter(std::forward(vals))...)); + return FmtObject( + fmt, ctx, + std::make_tuple( + llvm::detail::build_format_adapter(std::forward(vals))...)); +} + +} // end namespace tblgen +} // end namespace mlir + +#endif // MLIR_TABLEGEN_FORMAT_H_ diff --git a/mlir/lib/TableGen/CMakeLists.txt b/mlir/lib/TableGen/CMakeLists.txt index 6522ab0..63b1e2e 100644 --- a/mlir/lib/TableGen/CMakeLists.txt +++ b/mlir/lib/TableGen/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMMLIRTableGen Argument.cpp Attribute.cpp Constraint.cpp + Format.cpp Operator.cpp OpTrait.cpp Pattern.cpp diff --git a/mlir/lib/TableGen/Format.cpp b/mlir/lib/TableGen/Format.cpp new file mode 100644 index 0000000..6dd260a --- /dev/null +++ b/mlir/lib/TableGen/Format.cpp @@ -0,0 +1,184 @@ +//===- Format.cpp - Utilities for String Format ---------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file defines utilities for formatting strings. They are specially +// tailored to the needs of TableGen'ing op definitions and rewrite rules, +// so they are not expected to be used as widely applicable utilities. +// +//===----------------------------------------------------------------------===// + +#include "mlir/TableGen/Format.h" + +using namespace mlir; +using namespace mlir::tblgen; + +// Marker to indicate an error happened when replacing a placeholder. +const char *const kMarkerForNoSubst = ""; + +FmtContext &tblgen::FmtContext::addSubst(StringRef placeholder, Twine subst) { + customSubstMap[placeholder] = subst.str(); + return *this; +} + +FmtContext &tblgen::FmtContext::withBuilder(Twine subst) { + builtinSubstMap[PHKind::Builder] = subst.str(); + return *this; +} + +FmtContext &tblgen::FmtContext::withOp(Twine subst) { + builtinSubstMap[PHKind::Op] = subst.str(); + return *this; +} + +FmtContext &tblgen::FmtContext::withSelf(Twine subst) { + builtinSubstMap[PHKind::Self] = subst.str(); + return *this; +} + +Optional +tblgen::FmtContext::getSubstFor(FmtContext::PHKind placeholder) const { + if (placeholder == FmtContext::PHKind::None || + placeholder == FmtContext::PHKind::Custom) + return {}; + auto it = builtinSubstMap.find(placeholder); + if (it == builtinSubstMap.end()) + return {}; + return StringRef(it->second); +} + +Optional +tblgen::FmtContext::getSubstFor(StringRef placeholder) const { + auto it = customSubstMap.find(placeholder); + if (it == customSubstMap.end()) + return {}; + return StringRef(it->second); +} + +FmtContext::PHKind tblgen::FmtContext::getPlaceHolderKind(StringRef str) { + return llvm::StringSwitch(str) + .Case("_builder", FmtContext::PHKind::Builder) + .Case("_op", FmtContext::PHKind::Op) + .Case("_self", FmtContext::PHKind::Self) + .Case("", FmtContext::PHKind::None) + .Default(FmtContext::PHKind::Custom); +} + +std::pair +tblgen::FmtObjectBase::splitFmtSegment(StringRef fmt) { + size_t begin = fmt.find_first_of('$'); + if (begin == StringRef::npos) { + // No placeholders: the whole format string should be returned as a + // literal string. + return {FmtReplacement{fmt}, StringRef()}; + } + if (begin != 0) { + // The first placeholder is not at the beginning: we can split the format + // string into a literal string and the rest. + return {FmtReplacement{fmt.substr(0, begin)}, fmt.substr(begin)}; + } + + // The first placeholder is at the beginning + + if (fmt.size() == 1) { + // The whole format string just contains '$': treat as literal. + return {FmtReplacement{fmt}, StringRef()}; + } + + // Allow escaping dollar with '$$' + if (fmt[1] == '$') { + return {FmtReplacement{fmt.substr(0, 1)}, fmt.substr(2)}; + } + + // First try to see if it's a positional placeholder, and then handle special + // placeholders. + + size_t end = fmt.find_if_not([](char c) { return std::isdigit(c); }, 1); + if (end != 1) { + // We have a positional placeholder. Parse the index. + size_t index = 0; + if (fmt.substr(1, end - 1).consumeInteger(0, index)) { + llvm_unreachable("invalid replacement sequence index"); + } + + if (end == StringRef::npos) { + // All the remaining characters are part of the positional placeholder. + return {FmtReplacement{fmt, index}, StringRef()}; + } + return {FmtReplacement{fmt.substr(0, end), index}, fmt.substr(end)}; + } + + end = fmt.find_if_not([](char c) { return std::isalnum(c) || c == '_'; }, 1); + auto placeholder = FmtContext::getPlaceHolderKind(fmt.substr(1, end - 1)); + if (end == StringRef::npos) { + // All the remaining characters are part of the special placeholder. + return {FmtReplacement{fmt, placeholder}, StringRef()}; + } + return {FmtReplacement{fmt.substr(0, end), placeholder}, fmt.substr(end)}; +} + +std::vector FmtObjectBase::parseFormatString(StringRef fmt) { + std::vector replacements; + FmtReplacement repl; + while (!fmt.empty()) { + std::tie(repl, fmt) = splitFmtSegment(fmt); + if (repl.type != FmtReplacement::Type::Empty) + replacements.push_back(repl); + } + return replacements; +} + +void FmtObjectBase::format(raw_ostream &s) const { + for (auto &repl : replacements) { + if (repl.type == FmtReplacement::Type::Empty) + continue; + + if (repl.type == FmtReplacement::Type::Literal) { + s << repl.spec; + continue; + } + + if (repl.type == FmtReplacement::Type::SpecialPH) { + if (repl.placeholder == FmtContext::PHKind::None) { + s << repl.spec; + } else if (!context) { + // We need the context to replace special placeholders. + s << repl.spec << kMarkerForNoSubst; + } else { + Optional subst; + if (repl.placeholder == FmtContext::PHKind::Custom) { + // Skip the leading '$' sign for the custom placeholder + subst = context->getSubstFor(repl.spec.substr(1)); + } else { + subst = context->getSubstFor(repl.placeholder); + } + if (subst) + s << *subst; + else + s << repl.spec << kMarkerForNoSubst; + } + continue; + } + + assert(repl.type == FmtReplacement::Type::PositionalPH); + + if (repl.index >= adapters.size()) { + s << repl.spec << kMarkerForNoSubst; + continue; + } + adapters[repl.index]->format(s, /*Options=*/""); + } +} diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index e9fdd62..c5568f8 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -8,3 +8,4 @@ endfunction() add_subdirectory(Dialect) add_subdirectory(IR) add_subdirectory(Pass) +add_subdirectory(TableGen) diff --git a/mlir/unittests/TableGen/CMakeLists.txt b/mlir/unittests/TableGen/CMakeLists.txt new file mode 100644 index 0000000..c2c406e --- /dev/null +++ b/mlir/unittests/TableGen/CMakeLists.txt @@ -0,0 +1,5 @@ +add_mlir_unittest(MLIRTableGenTests + FormatTest.cpp +) +target_link_libraries(MLIRDialectTests + PRIVATE LLVMMLIRTableGen) diff --git a/mlir/unittests/TableGen/FormatTest.cpp b/mlir/unittests/TableGen/FormatTest.cpp new file mode 100644 index 0000000..7338a8f --- /dev/null +++ b/mlir/unittests/TableGen/FormatTest.cpp @@ -0,0 +1,166 @@ +//===- FormatTest.cpp - TableGen Format Utility Tests ---------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#include "mlir/TableGen/Format.h" +#include "gmock/gmock.h" + +using mlir::tblgen::FmtContext; +using mlir::tblgen::tgfmt; +using ::testing::StrEq; + +TEST(FormatTest, EmptyFmtStr) { + FmtContext ctx; + std::string result = tgfmt("", &ctx); + EXPECT_TRUE(result.empty()); +} + +// Allow extra unused positional parameters +TEST(FormatTest, EmptyFmtStrExtraParams) { + FmtContext ctx; + std::string result = tgfmt("", &ctx, "a", "b", "c"); + EXPECT_TRUE(result.empty()); +} + +// Allow unused placeholder substitution in context +TEST(FormatTest, EmptyFmtStrPopulatedCtx) { + FmtContext ctx; + ctx.withBuilder("builder"); + std::string result = tgfmt("", &ctx); + EXPECT_TRUE(result.empty()); +} + +TEST(FormatTest, LiteralFmtStr) { + FmtContext ctx; + std::string result = tgfmt("void foo {}", &ctx); + EXPECT_THAT(result, StrEq("void foo {}")); +} + +// Print single dollar literally +TEST(FormatTest, AdjacentDollar) { + FmtContext ctx; + std::string result = tgfmt("$", &ctx); + EXPECT_THAT(result, StrEq("$")); +} + +// Print dangling dollar literally +TEST(FormatTest, DanglingDollar) { + FmtContext ctx; + std::string result = tgfmt("foo bar baz$", &ctx); + EXPECT_THAT(result, StrEq("foo bar baz$")); +} + +// Allow escape dollars with '$$' +TEST(FormatTest, EscapeDollars) { + FmtContext ctx; + std::string result = + tgfmt("$$ $$$$ $$$0 $$$_self", &ctx.withSelf("self"), "-0"); + EXPECT_THAT(result, StrEq("$ $$ $-0 $self")); +} + +TEST(FormatTest, PositionalFmtStr) { + FmtContext ctx; + std::string b = "b"; + int c = 42; + char d = 'd'; + std::string result = tgfmt("$0 $1 $2 $3", &ctx, "a", b, c + 1, d); + EXPECT_THAT(result, StrEq("a b 43 d")); +} + +// Output the placeholder if missing substitution +TEST(FormatTest, PositionalFmtStrMissingParams) { + FmtContext ctx; + std::string result = tgfmt("$0 %1 $2", &ctx); + EXPECT_THAT(result, StrEq("$0 %1 $2")); +} + +// Allow flexible reference of positional parameters +TEST(FormatTest, PositionalFmtStrFlexibleRef) { + FmtContext ctx; + std::string result = tgfmt("$2 $0 $2", &ctx, "a", "b", "c"); + EXPECT_THAT(result, StrEq("c a c")); +} + +TEST(FormatTest, PositionalFmtStrNoWhitespace) { + FmtContext ctx; + std::string result = tgfmt("foo$0bar", &ctx, "-"); + EXPECT_THAT(result, StrEq("foo-bar")); +} + +TEST(FormatTest, PlaceHolderFmtStrWithSelf) { + FmtContext ctx; + std::string result = tgfmt("$_self", &ctx.withSelf("sss")); + EXPECT_THAT(result, StrEq("sss")); +} + +TEST(FormatTest, PlaceHolderFmtStrWithBuilder) { + FmtContext ctx; + + std::string result = tgfmt("$_builder", &ctx.withBuilder("bbb")); + EXPECT_THAT(result, StrEq("bbb")); +} + +TEST(FormatTest, PlaceHolderFmtStrWithOp) { + FmtContext ctx; + std::string result = tgfmt("$_op", &ctx.withOp("ooo")); + EXPECT_THAT(result, StrEq("ooo")); +} + +TEST(FormatTest, PlaceHolderMissingCtx) { + std::string result = tgfmt("$_op", nullptr); + EXPECT_THAT(result, StrEq("$_op")); +} + +TEST(FormatTest, PlaceHolderMissingSubst) { + FmtContext ctx; + std::string result = tgfmt("$_op", &ctx.withBuilder("builder")); + EXPECT_THAT(result, StrEq("$_op")); +} + +// Test commonly used delimiters in C++ +TEST(FormatTest, PlaceHolderFmtStrDelimiter) { + FmtContext ctx; + ctx.addSubst("m", ""); + std::string result = tgfmt("$m{$m($m[$m]$m)$m}$m|", &ctx); + EXPECT_THAT(result, StrEq("{([])}|")); +} + +// Test allowed characters in placeholder symbol +TEST(FormatTest, CustomPlaceHolderFmtStrPlaceHolderChars) { + FmtContext ctx; + ctx.addSubst("m", "0 "); + ctx.addSubst("m1", "1 "); + ctx.addSubst("m2C", "2 "); + ctx.addSubst("M_3", "3 "); + std::string result = tgfmt("$m$m1$m2C$M_3", &ctx); + EXPECT_THAT(result, StrEq("0 1 2 3 ")); +} + +TEST(FormatTest, CustomPlaceHolderFmtStrUnregisteredPlaceHolders) { + FmtContext ctx; + std::string result = tgfmt("foo($awesome, $param)", &ctx); + EXPECT_THAT(result, + StrEq("foo($awesome, $param)")); +} + +TEST(FormatTest, MixedFmtStr) { + FmtContext ctx; + ctx.withBuilder("bbb"); + + std::string result = tgfmt("$_builder.build($_self, {$0, $1})", + &ctx.withSelf("sss"), "a", "b"); + EXPECT_THAT(result, StrEq("bbb.build(sss, {a, b})")); +} -- 2.7.4