--- /dev/null
+//===- Markup.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares the log symbolizer markup data model and parser.
+///
+/// \todo Add a link to the reference documentation once added.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
+
+#include <iostream>
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
+
+namespace llvm {
+namespace symbolize {
+
+/// A node of symbolizer markup.
+///
+/// If only the Text field is set, this represents a region of text outside a
+/// markup element. ANSI SGR control codes are also reported this way; if
+/// detected, then the control code will be the entirety of the Text field, and
+/// any surrounding text will be reported as preceding and following nodes.
+struct MarkupNode {
+ /// The full text of this node in the input.
+ StringRef Text;
+
+ /// If this represents an element, the tag. Otherwise, empty.
+ StringRef Tag;
+
+ /// If this represents an element with fields, a list of the field contents.
+ /// Otherwise, empty.
+ SmallVector<StringRef> Fields;
+
+ bool operator==(const MarkupNode &Other) const {
+ return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
+ }
+ bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
+};
+
+/// Parses a log containing symbolizer markup into a sequence of nodes.
+class MarkupParser {
+public:
+ MarkupParser();
+
+ /// Parses an individual \p Line of input.
+ ///
+ /// Nodes from the previous parseLine() call that haven't yet been extracted
+ /// by nextNode() are discarded. The nodes returned by nextNode() may
+ /// reference the input string, so it must be retained by the caller until the
+ /// last use.
+ void parseLine(StringRef Line);
+
+ /// Returns the next node from the most recent parseLine() call.
+ ///
+ /// Calling nextNode() may invalidate the contents of the node returned by the
+ /// previous call.
+ ///
+ /// \returns the next markup node or None if none remain.
+ Optional<MarkupNode> nextNode() {
+ if (!NextIdx)
+ NextIdx = 0;
+ if (*NextIdx == Buffer.size()) {
+ NextIdx.reset();
+ Buffer.clear();
+ return None;
+ }
+ return std::move(Buffer[(*NextIdx)++]);
+ }
+
+private:
+ Optional<MarkupNode> parseElement(StringRef Line);
+ void parseTextOutsideMarkup(StringRef Text);
+
+ // Buffer for nodes parsed from the current line.
+ SmallVector<MarkupNode> Buffer;
+
+ // Next buffer index to return or None if nextNode has not yet been called.
+ Optional<size_t> NextIdx;
+
+ // Regular expression matching supported ANSI SGR escape sequences.
+ const Regex SGRSyntax;
+};
+
+} // end namespace symbolize
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
--- /dev/null
+//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the log symbolizer markup data model and parser.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+
+#include "llvm/ADT/StringExtras.h"
+
+namespace llvm {
+namespace symbolize {
+
+// Matches the following:
+// "\033[0m"
+// "\033[1m"
+// "\033[30m" -- "\033[37m"
+static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
+
+MarkupParser::MarkupParser() : SGRSyntax(SGRSyntaxStr) {}
+
+static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
+ return Str.take_front(Pos - Str.begin());
+}
+static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
+ Str = Str.drop_front(Pos - Str.begin());
+}
+
+void MarkupParser::parseLine(StringRef Line) {
+ Buffer.clear();
+ while (!Line.empty()) {
+ // Find the first valid markup element, if any.
+ if (Optional<MarkupNode> Element = parseElement(Line)) {
+ parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
+ Buffer.push_back(std::move(*Element));
+ advanceTo(Line, Element->Text.end());
+ } else {
+ // The line doesn't contain any more markup elements, so emit it as text.
+ parseTextOutsideMarkup(Line);
+ return;
+ }
+ }
+}
+
+// Finds and returns the next valid markup element in the given line. Returns
+// None if the line contains no valid elements.
+Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
+ while (true) {
+ // Find next element using begin and end markers.
+ size_t BeginPos = Line.find("{{{");
+ if (BeginPos == StringRef::npos)
+ return None;
+ size_t EndPos = Line.find("}}}", BeginPos + 3);
+ if (EndPos == StringRef::npos)
+ return None;
+ EndPos += 3;
+ MarkupNode Element;
+ Element.Text = Line.slice(BeginPos, EndPos);
+ Line = Line.substr(EndPos);
+
+ // Parse tag.
+ StringRef Content = Element.Text.drop_front(3).drop_back(3);
+ StringRef FieldsContent;
+ std::tie(Element.Tag, FieldsContent) = Content.split(':');
+ if (Element.Tag.empty())
+ continue;
+
+ // Parse fields.
+ if (!FieldsContent.empty())
+ FieldsContent.split(Element.Fields, ":");
+ else if (Content.back() == ':')
+ Element.Fields.push_back(FieldsContent);
+
+ return Element;
+ }
+}
+
+static MarkupNode textNode(StringRef Text) {
+ MarkupNode Node;
+ Node.Text = Text;
+ return Node;
+}
+
+// Parses a region of text known to be outside any markup elements. Such text
+// may still contain SGR control codes, so the region is further subdivided into
+// control codes and true text regions.
+void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
+ if (Text.empty())
+ return;
+ SmallVector<StringRef> Matches;
+ while (SGRSyntax.match(Text, &Matches)) {
+ // Emit any text before the SGR element.
+ if (Matches.begin()->begin() != Text.begin())
+ Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
+
+ Buffer.push_back(textNode(*Matches.begin()));
+ advanceTo(Text, Matches.begin()->end());
+ }
+ if (!Text.empty())
+ Buffer.push_back(textNode(Text));
+}
+
+} // end namespace symbolize
+} // end namespace llvm
--- /dev/null
+
+//===- unittest/DebugInfo/Symbolizer/MarkupTest.cpp - Markup parser tests -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+using namespace llvm;
+using namespace llvm::symbolize;
+using namespace testing;
+
+Matcher<MarkupNode> isNode(StringRef Text, StringRef Tag = "",
+ Matcher<SmallVector<StringRef>> Fields = IsEmpty()) {
+ return AllOf(Field("Text", &MarkupNode::Text, Text),
+ Field("Tag", &MarkupNode::Tag, Tag),
+ Field("Fields", &MarkupNode::Fields, Fields));
+}
+
+TEST(SymbolizerMarkup, NoLines) { EXPECT_EQ(MarkupParser{}.nextNode(), None); }
+
+TEST(SymbolizerMarkup, LinesWithoutMarkup) {
+ MarkupParser Parser;
+
+ Parser.parseLine("text");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("text")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("discarded");
+ Parser.parseLine("kept");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("kept")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{}}")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{}}}")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{}}}")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{:field}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{:field}}}")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tag:");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tag:field}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:field}}")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("a\033[2mb");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a\033[2mb")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("a\033[38mb");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a\033[38mb")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("a\033[4mb");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a\033[4mb")));
+ EXPECT_THAT(Parser.nextNode(), None);
+}
+
+TEST(SymbolizerMarkup, LinesWithMarkup) {
+ MarkupParser Parser;
+
+ Parser.parseLine("{{{tag}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag}}}", "tag")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tag:f1:f2:f3}}}");
+ EXPECT_THAT(Parser.nextNode(),
+ testing::Optional(isNode("{{{tag:f1:f2:f3}}}", "tag",
+ ElementsAre("f1", "f2", "f3"))));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tag:}}}");
+ EXPECT_THAT(Parser.nextNode(),
+ testing::Optional(isNode("{{{tag:}}}", "tag", ElementsAre(""))));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tag:}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag:}}")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{t2g}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{t2g}}}", "t2g")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tAg}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tAg}}}", "tAg")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("a{{{b}}}c{{{d}}}e");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("a")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{b}}}", "b")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("c")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{d}}}", "d")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("e")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{}}}{{{tag}}}");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{}}}")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("{{{tag}}}", "tag")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("\033[0mA\033[1mB\033[30mC\033[37m");
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[0m")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("A")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[1m")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("B")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[30m")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("C")));
+ EXPECT_THAT(Parser.nextNode(), testing::Optional(isNode("\033[37m")));
+ EXPECT_THAT(Parser.nextNode(), None);
+
+ Parser.parseLine("{{{tag:\033[0m}}}");
+ EXPECT_THAT(Parser.nextNode(),
+ testing::Optional(
+ isNode("{{{tag:\033[0m}}}", "tag", ElementsAre("\033[0m"))));
+ EXPECT_THAT(Parser.nextNode(), None);
+}
+
+} // namespace