Handle zero-width and double-width characters in string literals and comments.

author Alexander Kornienko <alexfh@google.com>

Thu, 5 Sep 2013 14:08:34 +0000 (14:08 +0000)

committer Alexander Kornienko <alexfh@google.com>

Thu, 5 Sep 2013 14:08:34 +0000 (14:08 +0000)
author Alexander Kornienko <alexfh@google.com>
Thu, 5 Sep 2013 14:08:34 +0000 (14:08 +0000)
committer Alexander Kornienko <alexfh@google.com>
Thu, 5 Sep 2013 14:08:34 +0000 (14:08 +0000)
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h

index 6874d9e..6c6575f 100644 (file)
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -149,9 +149,12 @@ struct FormatStyle {
    /// Otherwise puts them into the right-most column.
    bool AlignEscapedNewlinesLeft;
  
-  /// \brief The number of characters to use for indentation.
+  /// \brief The number of columns to use for indentation.
    unsigned IndentWidth;
  
+  /// \brief The number of columns used for tab stops.
+  unsigned TabWidth;
+
    /// \brief The number of characters to use for indentation of constructor
    /// initializer lists.
    unsigned ConstructorInitializerIndentWidth;
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp

index 3b29137..053be4b 100644 (file)
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -41,6 +41,7 @@ static bool IsBlank(char C) {
  static BreakableToken::Split getCommentSplit(StringRef Text,
                                               unsigned ContentStartColumn,
                                               unsigned ColumnLimit,
+                                             unsigned TabWidth,
                                               encoding::Encoding Encoding) {
    if (ColumnLimit <= ContentStartColumn + 1)
      return BreakableToken::Split(StringRef::npos, 0);
@@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
    unsigned MaxSplitBytes = 0;
  
    for (unsigned NumChars = 0;
-       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
-    MaxSplitBytes +=
+       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
+    unsigned BytesInChar =
          encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
+    NumChars +=
+        encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
+                                      ContentStartColumn, TabWidth, Encoding);
+    MaxSplitBytes += BytesInChar;
+  }
  
    StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
    if (SpaceOffset == StringRef::npos ||
@@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
  static BreakableToken::Split getStringSplit(StringRef Text,
                                              unsigned ContentStartColumn,
                                              unsigned ColumnLimit,
+                                            unsigned TabWidth,
                                              encoding::Encoding Encoding) {
    // FIXME: Reduce unit test case.
    if (Text.empty())
@@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
      return BreakableToken::Split(StringRef::npos, 0);
    unsigned MaxSplit =
        std::min<unsigned>(ColumnLimit - ContentStartColumn,
-                         encoding::getCodePointCount(Text, Encoding) - 1);
+                         encoding::columnWidthWithTabs(Text, ContentStartColumn,
+                                                       TabWidth, Encoding) -
+                             1);
    StringRef::size_type SpaceOffset = 0;
    StringRef::size_type SlashOffset = 0;
    StringRef::size_type WordStartOffset = 0;
@@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
        Chars += Advance;
      } else {
        Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
-      Chars += 1;
+      Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
+                                             ContentStartColumn + Chars,
+                                             TabWidth, Encoding);
      }
  
      if (Chars > MaxSplit)
@@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
  unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
      unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
    return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
+         encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+                                       StartColumn + Prefix.size(),
+                                       Style.TabWidth, Encoding);
  }
  
  BreakableSingleLineToken::BreakableSingleLineToken(
      const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
-    : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
-      Prefix(Prefix), Postfix(Postfix) {
+    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+    const FormatStyle &Style)
+    : BreakableToken(Tok, InPPDirective, Encoding, Style),
+      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
    assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
    Line = Tok.TokenText.substr(
        Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
@@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken(
  BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
                                                 unsigned StartColumn,
                                                 bool InPPDirective,
-                                               encoding::Encoding Encoding)
+                                               encoding::Encoding Encoding,
+                                               const FormatStyle &Style)
      : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
-                               Encoding) {}
+                               Encoding, Style) {}
  
  BreakableToken::Split
  BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
                                   unsigned ColumnLimit) const {
    return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
-                        Encoding);
+                        Style.TabWidth, Encoding);
  }
  
  void BreakableStringLiteral::insertBreak(unsigned LineIndex,
@@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
  BreakableLineComment::BreakableLineComment(const FormatToken &Token,
                                             unsigned StartColumn,
                                             bool InPPDirective,
-                                           encoding::Encoding Encoding)
+                                           encoding::Encoding Encoding,
+                                           const FormatStyle &Style)
      : BreakableSingleLineToken(Token, StartColumn,
                                 getLineCommentPrefix(Token.TokenText), "",
-                               InPPDirective, Encoding) {
+                               InPPDirective, Encoding, Style) {
    OriginalPrefix = Prefix;
    if (Token.TokenText.size() > Prefix.size() &&
        isAlphanumeric(Token.TokenText[Prefix.size()])) {
@@ -195,7 +211,7 @@ BreakableToken::Split
  BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
                                 unsigned ColumnLimit) const {
    return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
-                         ColumnLimit, Encoding);
+                         ColumnLimit, Style.TabWidth, Encoding);
  }
  
  void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
@@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
  }
  
  BreakableBlockComment::BreakableBlockComment(
-    const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
+    const FormatToken &Token, unsigned StartColumn,
      unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
-    encoding::Encoding Encoding)
-    : BreakableToken(Token, InPPDirective, Encoding) {
+    encoding::Encoding Encoding, const FormatStyle &Style)
+    : BreakableToken(Token, InPPDirective, Encoding, Style) {
    StringRef TokenText(Token.TokenText);
    assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
    TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
@@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment(
    StartOfLineColumn.resize(Lines.size());
    StartOfLineColumn[0] = StartColumn + 2;
    for (size_t i = 1; i < Lines.size(); ++i)
-    adjustWhitespace(Style, i, IndentDelta);
+    adjustWhitespace(i, IndentDelta);
  
    Decoration = "* ";
    if (Lines.size() == 1 && !FirstInLine) {
@@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment(
    });
  }
  
-void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
-                                             unsigned LineIndex,
+void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
                                               int IndentDelta) {
    // When in a preprocessor directive, the trailing backslash in a block comment
    // is not needed, but can serve a purpose of uniformity with necessary escaped
@@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
    if (StartOfLine == StringRef::npos)
      StartOfLine = Lines[LineIndex].size();
  
+  StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
    // Adjust Lines to only contain relevant text.
    Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
    Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
@@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
    // if leading tabs are intermixed with spaces, that is not a high priority.
  
    // Adjust the start column uniformly accross all lines.
-  StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
+  StartOfLineColumn[LineIndex] =
+      std::max<int>(0, Whitespace.size() + IndentDelta);
  }
  
  unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
  
  unsigned BreakableBlockComment::getLineLengthAfterSplit(
      unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
-  return getContentStartColumn(LineIndex, Offset) +
-         encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
-                                     Encoding) +
+  unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
+  return ContentStartColumn +
+         encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
+                                       ContentStartColumn, Style.TabWidth,
+                                       Encoding) +
           // The last line gets a "*/" postfix.
           (LineIndex + 1 == Lines.size() ? 2 : 0);
  }
@@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
                                  unsigned ColumnLimit) const {
    return getCommentSplit(Lines[LineIndex].substr(TailOffset),
                           getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Encoding);
+                         ColumnLimit, Style.TabWidth, Encoding);
  }
  
  void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h

index 90b78ac..65b9015 100644 (file)
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@@ -67,12 +67,14 @@ public:
  
  protected:
    BreakableToken(const FormatToken &Tok, bool InPPDirective,
-                 encoding::Encoding Encoding)
-      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
+                 encoding::Encoding Encoding, const FormatStyle &Style)
+      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
+        Style(Style) {}
  
    const FormatToken &Tok;
    const bool InPPDirective;
    const encoding::Encoding Encoding;
+  const FormatStyle &Style;
  };
  
  /// \brief Base class for single line tokens that can be broken.
@@ -88,7 +90,8 @@ public:
  protected:
    BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
                             StringRef Prefix, StringRef Postfix,
-                           bool InPPDirective, encoding::Encoding Encoding);
+                           bool InPPDirective, encoding::Encoding Encoding,
+                           const FormatStyle &Style);
  
    // The column in which the token starts.
    unsigned StartColumn;
@@ -107,7 +110,8 @@ public:
    /// \p StartColumn specifies the column in which the token will start
    /// after formatting.
    BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
-                         bool InPPDirective, encoding::Encoding Encoding);
+                         bool InPPDirective, encoding::Encoding Encoding,
+                         const FormatStyle &Style);
  
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const;
@@ -122,7 +126,8 @@ public:
    /// \p StartColumn specifies the column in which the comment will start
    /// after formatting.
    BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
-                       bool InPPDirective, encoding::Encoding Encoding);
+                       bool InPPDirective, encoding::Encoding Encoding,
+                       const FormatStyle &Style);
  
    virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                           unsigned ColumnLimit) const;
@@ -144,10 +149,10 @@ public:
    /// after formatting, while \p OriginalStartColumn specifies in which
    /// column the comment started before formatting.
    /// If the comment starts a line after formatting, set \p FirstInLine to true.
-  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
-                        unsigned StartColumn, unsigned OriginaStartColumn,
-                        bool FirstInLine, bool InPPDirective,
-                        encoding::Encoding Encoding);
+  BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
+                        unsigned OriginaStartColumn, bool FirstInLine,
+                        bool InPPDirective, encoding::Encoding Encoding,
+                        const FormatStyle &Style);
  
    virtual unsigned getLineCount() const;
    virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
@@ -172,8 +177,7 @@ private:
    // Sets StartOfLineColumn to the intended column in which the text at
    // Lines[LineIndex] starts (note that the decoration, if present, is not
    // considered part of the text).
-  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
-                        int IndentDelta);
+  void adjustWhitespace(unsigned LineIndex, int IndentDelta);
  
    // Returns the column at which the text in line LineIndex starts, when broken
    // at TailOffset. Note that the decoration (if present) is not considered part
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp

index c894a4b..9e84ea7 100644 (file)
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
      State.Stack[i].BreakBeforeParameter = true;
  
    unsigned ColumnsUsed =
-      State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
+      State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
    // We can only affect layout of the first and the last line, so the penalty
    // for all other lines is constant, and we ignore it.
-  State.Column = Current.CodePointsInLastLine;
+  State.Column = Current.LastLineColumnWidth;
  
    if (ColumnsUsed > getColumnLimit(State))
      return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
@@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
        return 0;
  
      Token.reset(new BreakableStringLiteral(
-        Current, StartColumn, State.Line->InPPDirective, Encoding));
+        Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
    } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
      unsigned OriginalStartColumn =
          SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
          1;
      Token.reset(new BreakableBlockComment(
-        Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding));
+        Current, StartColumn, OriginalStartColumn, !Current.Previous,
+        State.Line->InPPDirective, Encoding, Style));
    } else if (Current.Type == TT_LineComment &&
               (Current.Previous == NULL ||
                Current.Previous->Type != TT_ImplicitStringLiteral)) {
@@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
      // leading whitespace in consecutive lines when changing indentation of
      // the first line similar to what we do with block comments.
      if (Current.isMultiline()) {
-      State.Column = StartColumn + Current.CodePointsInFirstLine;
+      State.Column = StartColumn + Current.FirstLineColumnWidth;
        return 0;
      }
  
-    Token.reset(new BreakableLineComment(Current, StartColumn,
-                                         State.Line->InPPDirective, Encoding));
+    Token.reset(new BreakableLineComment(
+        Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
    } else {
      return 0;
    }
diff --git a/clang/lib/Format/Encoding.h b/clang/lib/Format/Encoding.h

index e9e9ae7..356334d 100644 (file)
--- a/clang/lib/Format/Encoding.h
+++ b/clang/lib/Format/Encoding.h
@@ -18,6 +18,7 @@
  
  #include "clang/Basic/LLVM.h"
  #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"
  
  namespace clang {
  namespace format {
@@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
    }
  }
  
+/// \brief Returns the number of columns required to display the \p Text on a
+/// generic Unicode-capable terminal. Text is assumed to use the specified
+/// \p Encoding.
+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
+  if (Encoding == Encoding_UTF8) {
+    int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
+    if (ContentWidth >= 0)
+      return ContentWidth;
+  }
+  return Text.size();
+}
+
+/// \brief Returns the number of columns required to display the \p Text,
+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
+/// text is assumed to use the specified \p Encoding.
+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
+                                    unsigned TabWidth, Encoding Encoding) {
+  unsigned TotalWidth = 0;
+  StringRef Tail = Text;
+  for (;;) {
+    StringRef::size_type TabPos = Tail.find('\t');
+    if (TabPos == StringRef::npos)
+      return TotalWidth + columnWidth(Tail, Encoding);
+    int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
+    assert(Width >= 0);
+    TotalWidth += Width;
+    TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
+    Tail = Tail.substr(TabPos + 1);
+  }
+}
+
  /// \brief Gets the number of bytes in a sequence representing a single
  /// codepoint and starting with FirstChar in the specified Encoding.
  inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp

index c482c40..02adc5a 100644 (file)
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -136,6 +136,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> {
      IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
      IO.mapOptional("Standard", Style.Standard);
      IO.mapOptional("IndentWidth", Style.IndentWidth);
+    IO.mapOptional("TabWidth", Style.TabWidth);
      IO.mapOptional("UseTab", Style.UseTab);
      IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
      IO.mapOptional("IndentFunctionDeclarationAfterType",
@@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() {
    LLVMStyle.IndentCaseLabels = false;
    LLVMStyle.IndentFunctionDeclarationAfterType = false;
    LLVMStyle.IndentWidth = 2;
+  LLVMStyle.TabWidth = 8;
    LLVMStyle.MaxEmptyLinesToKeep = 1;
    LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
    LLVMStyle.ObjCSpaceBeforeProtocolList = true;
@@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() {
    GoogleStyle.IndentCaseLabels = true;
    GoogleStyle.IndentFunctionDeclarationAfterType = true;
    GoogleStyle.IndentWidth = 2;
+  GoogleStyle.TabWidth = 8;
    GoogleStyle.MaxEmptyLinesToKeep = 1;
    GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
    GoogleStyle.ObjCSpaceBeforeProtocolList = false;
@@ -629,7 +632,7 @@ private:
            ++Column;
            break;
          case '\t':
-          Column += Style.IndentWidth - Column % Style.IndentWidth;
+          Column += Style.TabWidth - Column % Style.TabWidth;
            break;
          default:
            ++Column;
@@ -681,10 +684,12 @@ private:
        StringRef Text = FormatTok->TokenText;
        size_t FirstNewlinePos = Text.find('\n');
        if (FirstNewlinePos != StringRef::npos) {
-        FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
-            Text.substr(0, FirstNewlinePos), Encoding);
-        FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
-            Text.substr(Text.find_last_of('\n') + 1), Encoding);
+        // FIXME: Handle embedded tabs.
+        FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
+            Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
+        FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+            Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
+            Encoding);
        }
      }
      // FIXME: Add the CodePointCount to Column.
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h

index 0b770f3..e4342dd 100644 (file)
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -83,7 +83,7 @@ class AnnotatedLine;
  struct FormatToken {
    FormatToken()
        : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
+        CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
          IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
          BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
          CanBreakBefore(false), ClosesTemplateDeclaration(false),
@@ -120,15 +120,15 @@ struct FormatToken {
    /// \brief Contains the number of code points in the first line of a
    /// multi-line string literal or comment. Zero if there's no newline in the
    /// token.
-  unsigned CodePointsInFirstLine;
+  unsigned FirstLineColumnWidth;
  
    /// \brief Contains the number of code points in the last line of a
    /// multi-line string literal or comment. Can be zero for line comments.
-  unsigned CodePointsInLastLine;
+  unsigned LastLineColumnWidth;
  
    /// \brief Returns \c true if the token text contains newlines (escaped or
    /// not).
-  bool isMultiline() const { return CodePointsInFirstLine != 0; }
+  bool isMultiline() const { return FirstLineColumnWidth != 0; }
  
    /// \brief Indicates that this is the first token.
    bool IsFirst;
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp

index 41519b6..0f46e62 100644 (file)
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) {
    if (!Style.UseTab)
      return std::string(Spaces, ' ');
  
-  return std::string(Spaces / Style.IndentWidth, '\t') +
-         std::string(Spaces % Style.IndentWidth, ' ');
+  return std::string(Spaces / Style.TabWidth, '\t') +
+         std::string(Spaces % Style.TabWidth, ' ');
  }
  
  } // namespace format
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp

index 4c66ef0..2307b03 100644 (file)
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -5638,9 +5638,41 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
                     "}",
                     21, 0, Tab));
  
+  Tab.TabWidth = 4;
+  Tab.IndentWidth = 8;
+  verifyFormat("class TabWidth4Indent8 {\n"
+               "\t\tvoid f() {\n"
+               "\t\t\t\tsomeFunction(parameter1,\n"
+               "\t\t\t\t\t\t\t parameter2);\n"
+               "\t\t}\n"
+               "};",
+               Tab);
+
+  Tab.TabWidth = 4;
+  Tab.IndentWidth = 4;
+  verifyFormat("class TabWidth4Indent4 {\n"
+               "\tvoid f() {\n"
+               "\t\tsomeFunction(parameter1,\n"
+               "\t\t\t\t\t parameter2);\n"
+               "\t}\n"
+               "};",
+               Tab);
+
+  Tab.TabWidth = 8;
+  Tab.IndentWidth = 4;
+  verifyFormat("class TabWidth8Indent4 {\n"
+               "    void f() {\n"
+               "\tsomeFunction(parameter1,\n"
+               "\t\t     parameter2);\n"
+               "    }\n"
+               "};",
+               Tab);
+
    // FIXME: To correctly count mixed whitespace we need to
    // also correctly count mixed whitespace in front of the comment.
-  //
+
+  // Tab.TabWidth = 8;
+  // Tab.IndentWidth = 8;
    // EXPECT_EQ("/*\n"
    //           "\t      a\t\tcomment\n"
    //           "\t      in multiple lines\n"
@@ -6074,15 +6106,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) {
    verifyFormat("\"Однажды в студёную зимнюю пору...\"",
                 getLLVMStyleWithColumns(35));
    verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"",
-               getLLVMStyleWithColumns(21));
+               getLLVMStyleWithColumns(31));
    verifyFormat("// Однажды в студёную зимнюю пору...",
                 getLLVMStyleWithColumns(36));
    verifyFormat("// 一 二 三 四 五 六 七 八 九 十",
-               getLLVMStyleWithColumns(22));
+               getLLVMStyleWithColumns(32));
    verifyFormat("/* Однажды в студёную зимнюю пору... */",
                 getLLVMStyleWithColumns(39));
    verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */",
-               getLLVMStyleWithColumns(25));
+               getLLVMStyleWithColumns(35));
  }
  
  TEST_F(FormatTest, SplitsUTF8Strings) {
@@ -6093,11 +6125,29 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
        "\"пору,\"",
        format("\"Однажды, в студёную зимнюю пору,\"",
               getLLVMStyleWithColumns(13)));
-  EXPECT_EQ("\"一 二 三 四 \"\n"
-            "\"五 六 七 八 \"\n"
-            "\"九 十\"",
-            format("\"一 二 三 四 五 六 七 八 九 十\"",
-                   getLLVMStyleWithColumns(10)));
+  EXPECT_EQ("\"一 二 三 \"\n"
+            "\"四 五六 \"\n"
+            "\"七 八 九 \"\n"
+            "\"十\"",
+            format("\"一 二 三 四 五六 七 八 九 十\"",
+                   getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("\"一\t二 \"\n"
+            "\"\t三 \"\n"
+            "\"四 五\t六 \"\n"
+            "\"\t七 \"\n"
+            "\"八九十\tqq\"",
+            format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
+                   getLLVMStyleWithColumns(11)));
+}
+
+
+TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
+  EXPECT_EQ("const char *sssss =\n"
+            "    \"一二三四五六七八\\\n"
+            " 九 十\";",
+            format("const char *sssss = \"一二三四五六七八\\\n"
+                   " 九 十\";",
+                   getLLVMStyleWithColumns(30)));
  }
  
  TEST_F(FormatTest, SplitsUTF8LineComments) {
@@ -6109,9 +6159,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) {
                     getLLVMStyleWithColumns(13)));
    EXPECT_EQ("// 一二三\n"
              "// 四五六七\n"
-            "// 八\n"
-            "// 九 十",
-            format("// 一二三 四五六七 八  九 十", getLLVMStyleWithColumns(6)));
+            "// 八  九\n"
+            "// 十",
+            format("// 一二三 四五六七 八  九 十", getLLVMStyleWithColumns(9)));
  }
  
  TEST_F(FormatTest, SplitsUTF8BlockComments) {
@@ -6126,18 +6176,20 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) {
              format("/* Гляжу, поднимается медленно в гору\n"
                     " * Лошадка, везущая хворосту воз. */",
                     getLLVMStyleWithColumns(13)));
-  EXPECT_EQ("/* 一二三\n"
-            " * 四五六七\n"
-            " * 八\n"
-            " * 九 十\n"
-            " */",
-            format("/* 一二三 四五六七 八  九 十 */", getLLVMStyleWithColumns(6)));
+  EXPECT_EQ(
+      "/* 一二三\n"
+      " * 四五六七\n"
+      " * 八  九\n"
+      " * 十  */",
+      format("/* 一二三 四五六七 八  九 十  */", getLLVMStyleWithColumns(9)));
    EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n"
              " * 𝕓𝕪𝕥𝕖\n"
              " * 𝖀𝕿𝕱-𝟠 */",
              format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12)));
  }
  
+#endif // _MSC_VER
+
  TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
    FormatStyle Style = getLLVMStyle();
  
@@ -6185,8 +6237,6 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
                 Style);
  }
  
-#endif
-
  TEST_F(FormatTest, FormatsWithWebKitStyle) {
    FormatStyle Style = getWebKitStyle();
author	Alexander Kornienko <alexfh@google.com>
	Thu, 5 Sep 2013 14:08:34 +0000 (14:08 +0000)
committer	Alexander Kornienko <alexfh@google.com>
	Thu, 5 Sep 2013 14:08:34 +0000 (14:08 +0000)
clang/include/clang/Format/Format.h		patch \| blob \| history
clang/lib/Format/BreakableToken.cpp		patch \| blob \| history
clang/lib/Format/BreakableToken.h		patch \| blob \| history
clang/lib/Format/ContinuationIndenter.cpp		patch \| blob \| history
clang/lib/Format/Encoding.h		patch \| blob \| history
clang/lib/Format/Format.cpp		patch \| blob \| history
clang/lib/Format/FormatToken.h		patch \| blob \| history
clang/lib/Format/WhitespaceManager.cpp		patch \| blob \| history
clang/unittests/Format/FormatTest.cpp		patch \| blob \| history