clang-format: Improve handling of unterminated string literals.

author Daniel Jasper <djasper@google.com>

Tue, 16 Jul 2013 20:28:33 +0000 (20:28 +0000)

committer Daniel Jasper <djasper@google.com>

Tue, 16 Jul 2013 20:28:33 +0000 (20:28 +0000)
author Daniel Jasper <djasper@google.com>
Tue, 16 Jul 2013 20:28:33 +0000 (20:28 +0000)
committer Daniel Jasper <djasper@google.com>
Tue, 16 Jul 2013 20:28:33 +0000 (20:28 +0000)
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp

index 27dd162..c6927ec 100644 (file)
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -905,6 +905,10 @@ private:
        // Only break up default narrow strings.
        if (!Current.TokenText.startswith("\""))
          return 0;
+      // Exempts unterminated string literals from line breaking. The user will
+      // likely want to terminate the string before any line breaking is done.
+      if (Current.IsUnterminatedLiteral)
+         return 0;
  
        Token.reset(new BreakableStringLiteral(Current, StartColumn,
                                               Line.InPPDirective, Encoding));
@@ -1259,8 +1263,7 @@ private:
      }
  
      FormatTok = new (Allocator.Allocate()) FormatToken;
-    Lex.LexFromRawLexer(FormatTok->Tok);
-    StringRef Text = rawTokenText(FormatTok->Tok);
+    readRawToken(*FormatTok);
      SourceLocation WhitespaceStart =
          FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
      if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
@@ -1269,16 +1272,16 @@ private:
      // Consume and record whitespace until we find a significant token.
      unsigned WhitespaceLength = TrailingWhitespace;
      while (FormatTok->Tok.is(tok::unknown)) {
-      unsigned Newlines = Text.count('\n');
+      unsigned Newlines = FormatTok->TokenText.count('\n');
        if (Newlines > 0)
-        FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1;
+        FormatTok->LastNewlineOffset =
+            WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
        FormatTok->NewlinesBefore += Newlines;
-      unsigned EscapedNewlines = Text.count("\\\n");
+      unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
        FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
        WhitespaceLength += FormatTok->Tok.getLength();
  
-      Lex.LexFromRawLexer(FormatTok->Tok);
-      Text = rawTokenText(FormatTok->Tok);
+      readRawToken(*FormatTok);
      }
  
      // In case the token starts with escaped newlines, we want to
@@ -1287,30 +1290,31 @@ private:
      // FIXME: What do we want to do with other escaped spaces, and escaped
      // spaces or newlines in the middle of tokens?
      // FIXME: Add a more explicit test.
-    while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') {
+    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+           FormatTok->TokenText[1] == '\n') {
        // FIXME: ++FormatTok->NewlinesBefore is missing...
        WhitespaceLength += 2;
-      Text = Text.substr(2);
+      FormatTok->TokenText = FormatTok->TokenText.substr(2);
      }
  
      TrailingWhitespace = 0;
      if (FormatTok->Tok.is(tok::comment)) {
-      StringRef UntrimmedText = Text;
-      Text = Text.rtrim();
-      TrailingWhitespace = UntrimmedText.size() - Text.size();
+      StringRef UntrimmedText = FormatTok->TokenText;
+      FormatTok->TokenText = FormatTok->TokenText.rtrim();
+      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
      } else if (FormatTok->Tok.is(tok::raw_identifier)) {
-      IdentifierInfo &Info = IdentTable.get(Text);
+      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
        FormatTok->Tok.setIdentifierInfo(&Info);
        FormatTok->Tok.setKind(Info.getTokenID());
      } else if (FormatTok->Tok.is(tok::greatergreater)) {
        FormatTok->Tok.setKind(tok::greater);
-      Text = Text.substr(0, 1);
+      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
        GreaterStashed = true;
      }
  
      // Now FormatTok is the next non-whitespace token.
-    FormatTok->TokenText = Text;
-    FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding);
+    FormatTok->CodePointCount =
+        encoding::getCodePointCount(FormatTok->TokenText, Encoding);
  
      FormatTok->WhitespaceRange = SourceRange(
          WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
@@ -1327,10 +1331,18 @@ private:
    llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
    SmallVector<FormatToken *, 16> Tokens;
  
-  /// Returns the text of \c FormatTok.
-  StringRef rawTokenText(Token &Tok) {
-    return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
-                     Tok.getLength());
+  void readRawToken(FormatToken &Tok) {
+    Lex.LexFromRawLexer(Tok.Tok);
+    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+                              Tok.Tok.getLength());
+
+    // For formatting, treat unterminated string literals like normal string
+    // literals.
+    if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
+        Tok.TokenText[0] == '"') {
+      Tok.Tok.setKind(tok::string_literal);
+      Tok.IsUnterminatedLiteral = true;
+    }
    }
  };
  
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h

index 03f0970..3841e49 100644 (file)
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -77,11 +77,12 @@ struct FormatToken {
    FormatToken()
        : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
          CodePointCount(0), IsFirst(false), MustBreakBefore(false),
-        BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
-        CanBreakBefore(false), ClosesTemplateDeclaration(false),
-        ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
-        UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
-        LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
+        IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
+        SpacesRequiredBefore(0), CanBreakBefore(false),
+        ClosesTemplateDeclaration(false), ParameterCount(0),
+        PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
+        BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
+        FakeRParens(0), LastInChainOfCalls(false),
          PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
          Next(NULL) {}
  
@@ -133,6 +134,9 @@ struct FormatToken {
    /// escaped newlines.
    StringRef TokenText;
  
+  /// \brief Set to \c true if this token is an unterminated literal.
+  bool IsUnterminatedLiteral;
+
    /// \brief Contains the kind of block if this token is a brace.
    BraceBlockKind BlockKind;
  
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp

index 7a2364c..6f6f468 100644 (file)
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -948,13 +948,14 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
        Current->SpacesRequiredBefore =
            spaceRequiredBefore(Line, *Current) ? 1 : 0;
  
-    if (Current->MustBreakBefore) {
-    } else if (Current->is(tok::comment)) {
+    if (Current->is(tok::comment)) {
        Current->MustBreakBefore = Current->NewlinesBefore > 0;
      } else if (Current->Previous->isTrailingComment() ||
                 (Current->is(tok::string_literal) &&
                  Current->Previous->is(tok::string_literal))) {
        Current->MustBreakBefore = true;
+    } else if (Current->Previous->IsUnterminatedLiteral) {
+      Current->MustBreakBefore = true;
      } else if (Current->is(tok::lessless) && Current->Next &&
                 Current->Previous->is(tok::string_literal) &&
                 Current->Next->is(tok::string_literal)) {
@@ -969,8 +970,6 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
                 Current->getNextNonComment() &&
                 Current->getNextNonComment()->is(tok::string_literal)) {
        Current->MustBreakBefore = true;
-    } else {
-      Current->MustBreakBefore = false;
      }
      Current->CanBreakBefore =
          Current->MustBreakBefore || canBreakBefore(Line, *Current);
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp

index aa2a709..3d5c21a 100644 (file)
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -5004,6 +5004,11 @@ TEST_F(FormatTest, SkipsUnknownStringLiterals) {
              format("L\"unsupported literal\";", getGoogleStyleWithColumns(15)));
    EXPECT_EQ("R\"x(raw literal)x\";",
              format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15)));
+  verifyFormat("string a = \"unterminated;");
+  EXPECT_EQ("function(\"unterminated,\n"
+            "         OtherParameter);",
+            format("function(  \"unterminated,\n"
+                   "    OtherParameter);"));
  }
  
  TEST_F(FormatTest, DoesNotTryToParseUDLiteralsInPreCpp11Code) {
author	Daniel Jasper <djasper@google.com>
	Tue, 16 Jul 2013 20:28:33 +0000 (20:28 +0000)
committer	Daniel Jasper <djasper@google.com>
	Tue, 16 Jul 2013 20:28:33 +0000 (20:28 +0000)
clang/lib/Format/Format.cpp		patch \| blob \| history
clang/lib/Format/FormatToken.h		patch \| blob \| history
clang/lib/Format/TokenAnnotator.cpp		patch \| blob \| history
clang/unittests/Format/FormatTest.cpp		patch \| blob \| history