Introduce a simple line-by-line iterator type into the Support library.
authorChandler Carruth <chandlerc@gmail.com>
Fri, 27 Dec 2013 04:28:57 +0000 (04:28 +0000)
committerChandler Carruth <chandlerc@gmail.com>
Fri, 27 Dec 2013 04:28:57 +0000 (04:28 +0000)
This is an iterator which you can build around a MemoryBuffer. It will
iterate through the non-empty, non-comment lines of the buffer as
a forward iterator. It should be small and reasonably fast (although it
could be made much faster if anyone cares, I don't really...).

This will be used to more simply support the text-based sample
profile file format, and is largely based on the original patch by
Diego. I've re-worked the style of it and separated it from the work of
producing a MemoryBuffer from a file which both simplifies the interface
and makes it easier to test.

The style of the API follows the C++ standard naming conventions to fit
in better with iterators in general, much like the Path and FileSystem
interfaces follow standard-based naming conventions.

llvm-svn: 198068

llvm/include/llvm/Support/LineIterator.h [new file with mode: 0644]
llvm/lib/Support/CMakeLists.txt
llvm/lib/Support/LineIterator.cpp [new file with mode: 0644]
llvm/unittests/Support/CMakeLists.txt
llvm/unittests/Support/LineIteratorTest.cpp [new file with mode: 0644]

diff --git a/llvm/include/llvm/Support/LineIterator.h b/llvm/include/llvm/Support/LineIterator.h
new file mode 100644 (file)
index 0000000..861c198
--- /dev/null
@@ -0,0 +1,73 @@
+//===- LineIterator.h - Iterator to read a text buffer's lines --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include <iterator>
+
+namespace llvm {
+
+class MemoryBuffer;
+
+/// \brief A forward iterator which reads non-blank text lines from a buffer.
+///
+/// This class provides a forward iterator interface for reading one line at
+/// a time from a buffer. When default constructed the iterator will be the
+/// "end" iterator.
+///
+/// The iterator also is aware of what line number it is currently processing
+/// and can strip comment lines given the comment-starting character.
+///
+/// Note that this iterator requires the buffer to be nul terminated.
+class line_iterator
+    : public std::iterator<std::forward_iterator_tag, StringRef, ptrdiff_t> {
+  const MemoryBuffer *Buffer;
+  char CommentMarker;
+
+  unsigned LineNumber;
+  StringRef CurrentLine;
+
+public:
+  /// \brief Default construct an "end" iterator.
+  line_iterator() : Buffer(0) {}
+
+  /// \brief Construct a new iterator around some memory buffer.
+  explicit line_iterator(const MemoryBuffer &Buffer, char CommentMarker = '\0');
+
+  /// \brief Return true if we've reached EOF or are an "end" iterator.
+  bool is_at_eof() const { return !Buffer; }
+
+  /// \brief Return true if we're an "end" iterator or have reached EOF.
+  bool is_at_end() const { return is_at_eof(); }
+
+  /// \brief Return the current line number. May return any number at EOF.
+  int64_t line_number() const { return LineNumber; }
+
+  /// \brief Advance to the next (non-empty, non-comment) line.
+  line_iterator &operator++() {
+    advance();
+    return *this;
+  }
+
+  /// \brief Get the current line as a \c StringRef.
+  StringRef operator*() const { return CurrentLine; }
+
+  friend bool operator==(const line_iterator &LHS, const line_iterator &RHS) {
+    return LHS.Buffer == RHS.Buffer &&
+           LHS.CurrentLine.begin() == RHS.CurrentLine.begin();
+  }
+
+  friend bool operator!=(const line_iterator &LHS, const line_iterator &RHS) {
+    return !(LHS == RHS);
+  }
+
+private:
+  /// \brief Advance the iterator to the next line.
+  void advance();
+};
+}
index 3aecf3f..cf4edff 100644 (file)
@@ -30,6 +30,7 @@ add_llvm_library(LLVMSupport
   IntrusiveRefCntPtr.cpp
   IsInf.cpp
   IsNAN.cpp
+  LineIterator.cpp
   Locale.cpp
   LockFileManager.cpp
   ManagedStatic.cpp
diff --git a/llvm/lib/Support/LineIterator.cpp b/llvm/lib/Support/LineIterator.cpp
new file mode 100644 (file)
index 0000000..c14f96b
--- /dev/null
@@ -0,0 +1,68 @@
+//===- LineIterator.cpp - Implementation of line iteration ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker)
+    : Buffer(Buffer.getBufferSize() ? &Buffer : 0),
+      CommentMarker(CommentMarker), LineNumber(1),
+      CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : 0, 0) {
+  // Ensure that if we are constructed on a non-empty memory buffer that it is
+  // a null terminated buffer.
+  if (Buffer.getBufferSize()) {
+    assert(Buffer.getBufferEnd()[0] == '\0');
+    advance();
+  }
+}
+
+void line_iterator::advance() {
+  assert(Buffer && "Cannot advance past the end!");
+
+  const char *Pos = CurrentLine.end();
+  assert(Pos == Buffer->getBufferStart() || *Pos == '\n' || *Pos == '\0');
+  size_t Length = 0;
+
+  if (CommentMarker == '\0') {
+    // If we're not stripping comments, this is simpler.
+    while (Pos[Length] == '\n')
+      ++Length;
+    Pos += Length;
+    LineNumber += Length;
+    Length = 0;
+  } else {
+    // Skip comments and count line numbers, which is a bit more complex.
+    for (;;) {
+      if (*Pos == CommentMarker)
+        do {
+          ++Pos;
+        } while (*Pos != '\0' && *Pos != '\n');
+      if (*Pos != '\n')
+        break;
+      ++Pos;
+      ++LineNumber;
+    }
+  }
+
+  if (*Pos == '\0') {
+    // We've hit the end of the buffer, reset ourselves to the end state.
+    Buffer = 0;
+    CurrentLine = StringRef();
+    return;
+  }
+
+  // Measure the line.
+  do {
+    ++Length;
+  } while (Pos[Length] != '\0' && Pos[Length] != '\n');
+
+  CurrentLine = StringRef(Pos, Length);
+}
index 0abc2ff..b3f0622 100644 (file)
@@ -18,6 +18,7 @@ add_llvm_unittest(SupportTests
   ErrorOrTest.cpp
   FileOutputBufferTest.cpp
   LeakDetectorTest.cpp
+  LineIteratorTest.cpp
   LockFileManagerTest.cpp
   ManagedStatic.cpp
   MathExtrasTest.cpp
diff --git a/llvm/unittests/Support/LineIteratorTest.cpp b/llvm/unittests/Support/LineIteratorTest.cpp
new file mode 100644 (file)
index 0000000..d684e25
--- /dev/null
@@ -0,0 +1,115 @@
+//===- LineIterator.cpp - Unit tests --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/LineIterator.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sys;
+
+namespace {
+
+TEST(LineIteratorTest, Basic) {
+  OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer("line 1\n"
+                                                            "line 2\n"
+                                                            "line 3"));
+
+  line_iterator I = line_iterator(*Buffer), E;
+
+  EXPECT_FALSE(I.is_at_eof());
+  EXPECT_NE(E, I);
+
+  EXPECT_EQ("line 1", *I);
+  EXPECT_EQ(1, I.line_number());
+  ++I;
+  EXPECT_EQ("line 2", *I);
+  EXPECT_EQ(2, I.line_number());
+  ++I;
+  EXPECT_EQ("line 3", *I);
+  EXPECT_EQ(3, I.line_number());
+  ++I;
+
+  EXPECT_TRUE(I.is_at_eof());
+  EXPECT_EQ(E, I);
+}
+
+TEST(LineIteratorTest, CommentSkipping) {
+  OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer("line 1\n"
+                                                            "line 2\n"
+                                                            "# Comment 1\n"
+                                                            "line 4\n"
+                                                            "# Comment 2"));
+
+  line_iterator I = line_iterator(*Buffer, '#'), E;
+
+  EXPECT_FALSE(I.is_at_eof());
+  EXPECT_NE(E, I);
+
+  EXPECT_EQ("line 1", *I);
+  EXPECT_EQ(1, I.line_number());
+  ++I;
+  EXPECT_EQ("line 2", *I);
+  EXPECT_EQ(2, I.line_number());
+  ++I;
+  EXPECT_EQ("line 4", *I);
+  EXPECT_EQ(4, I.line_number());
+  ++I;
+
+  EXPECT_TRUE(I.is_at_eof());
+  EXPECT_EQ(E, I);
+}
+
+TEST(LineIteratorTest, BlankSkipping) {
+  OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer("\n\n\n"
+                                                            "line 1\n"
+                                                            "\n\n\n"
+                                                            "line 2\n"
+                                                            "\n\n\n"));
+
+  line_iterator I = line_iterator(*Buffer), E;
+
+  EXPECT_FALSE(I.is_at_eof());
+  EXPECT_NE(E, I);
+
+  EXPECT_EQ("line 1", *I);
+  EXPECT_EQ(4, I.line_number());
+  ++I;
+  EXPECT_EQ("line 2", *I);
+  EXPECT_EQ(8, I.line_number());
+  ++I;
+
+  EXPECT_TRUE(I.is_at_eof());
+  EXPECT_EQ(E, I);
+}
+
+TEST(LineIteratorTest, EmptyBuffers) {
+  OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(""));
+  EXPECT_TRUE(line_iterator(*Buffer).is_at_eof());
+  EXPECT_EQ(line_iterator(), line_iterator(*Buffer));
+
+  Buffer.reset(MemoryBuffer::getMemBuffer("\n\n\n"));
+  EXPECT_TRUE(line_iterator(*Buffer).is_at_eof());
+  EXPECT_EQ(line_iterator(), line_iterator(*Buffer));
+
+  Buffer.reset(MemoryBuffer::getMemBuffer("# foo\n"
+                                          "\n"
+                                          "# bar"));
+  EXPECT_TRUE(line_iterator(*Buffer, '#').is_at_eof());
+  EXPECT_EQ(line_iterator(), line_iterator(*Buffer, '#'));
+
+  Buffer.reset(MemoryBuffer::getMemBuffer("\n"
+                                          "# baz\n"
+                                          "\n"));
+  EXPECT_TRUE(line_iterator(*Buffer, '#').is_at_eof());
+  EXPECT_EQ(line_iterator(), line_iterator(*Buffer, '#'));
+}
+
+} // anonymous namespace