From 1998ea2c7e3eee72b6622ef7caf8ae16b630f958 Mon Sep 17 00:00:00 2001 From: Manuel Klimek Date: Wed, 20 Feb 2013 10:15:13 +0000 Subject: [PATCH] Implements breaking of string literals if they stick out. An alternative strategy to calculating the break on demand when hitting a token that would need to be broken would be to put all possible breaks inside the token into the optimizer. Currently only supports breaking at spaces; more break points to come. llvm-svn: 175613 --- clang/lib/Format/Format.cpp | 114 +++++++++++++++++++++++++++++----- clang/unittests/Format/FormatTest.cpp | 60 ++++++++++++++++++ 2 files changed, 157 insertions(+), 17 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 8421a3b..21f32d2 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -135,8 +135,7 @@ public: // If this line does not have a trailing comment, align the stored comments. if (Tok.Children.empty() && !isTrailingComment(Tok)) alignComments(); - storeReplacement(Tok.FormatTok, - std::string(NewLines, '\n') + std::string(Spaces, ' ')); + storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); } /// \brief Like \c replaceWhitespace, but additionally adds right-aligned @@ -147,6 +146,47 @@ public: void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, unsigned Spaces, unsigned WhitespaceStartColumn, const FormatStyle &Style) { + storeReplacement( + Tok.FormatTok, + getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style)); + } + + /// \brief Inserts a line break into the middle of a token. + /// + /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line + /// break and \p Postfix before the rest of the token starts in the next line. + /// + /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are + /// used to generate the correct line break. + void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix, + StringRef Postfix, bool InPPDirective, unsigned Spaces, + unsigned WhitespaceStartColumn, const FormatStyle &Style) { + std::string NewLineText; + if (!InPPDirective) + NewLineText = getNewLineText(1, Spaces); + else + NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style); + std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); + SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart + .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset); + Replaces.insert( + tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText)); + } + + /// \brief Returns all the \c Replacements created during formatting. + const tooling::Replacements &generateReplacements() { + alignComments(); + return Replaces; + } + +private: + std::string getNewLineText(unsigned NewLines, unsigned Spaces) { + return std::string(NewLines, '\n') + std::string(Spaces, ' '); + } + + std::string + getNewLineText(unsigned NewLines, unsigned Spaces, + unsigned WhitespaceStartColumn, const FormatStyle &Style) { std::string NewLineText; if (NewLines > 0) { unsigned Offset = @@ -157,16 +197,9 @@ public: Offset = 0; } } - storeReplacement(Tok.FormatTok, NewLineText + std::string(Spaces, ' ')); + return NewLineText + std::string(Spaces, ' '); } - /// \brief Returns all the \c Replacements created during formatting. - const tooling::Replacements &generateReplacements() { - alignComments(); - return Replaces; - } - -private: /// \brief Structure to store a comment for later layout and alignment. struct StoredComment { FormatToken Tok; @@ -258,7 +291,7 @@ public: }); // The first token has already been indented and thus consumed. - moveStateToNextToken(State); + moveStateToNextToken(State, /*DryRun=*/ false); // If everything fits on a single line, just put it there. if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) { @@ -419,7 +452,7 @@ private: /// /// If \p DryRun is \c false, also creates and stores the required /// \c Replacement. - void addTokenToState(bool Newline, bool DryRun, LineState &State) { + unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { const AnnotatedToken &Current = *State.NextToken; const AnnotatedToken &Previous = *State.NextToken->Parent; assert(State.Stack.size()); @@ -431,7 +464,7 @@ private: State.NextToken = NULL; else State.NextToken = &State.NextToken->Children[0]; - return; + return 0; } if (Newline) { @@ -576,12 +609,12 @@ private: } } - moveStateToNextToken(State); + return moveStateToNextToken(State, DryRun); } /// \brief Mark the next token as consumed in \p State and modify its stacks /// accordingly. - void moveStateToNextToken(LineState &State) { + unsigned moveStateToNextToken(LineState &State, bool DryRun) { const AnnotatedToken &Current = *State.NextToken; assert(State.Stack.size()); @@ -649,12 +682,59 @@ private: State.Stack.pop_back(); } + State.Column += Current.FormatTok.TokenLength; + if (State.NextToken->Children.empty()) State.NextToken = NULL; else State.NextToken = &State.NextToken->Children[0]; - State.Column += Current.FormatTok.TokenLength; + return breakProtrudingToken(Current, State, DryRun); + } + + /// \brief If the current token sticks out over the end of the line, break + /// it if possible. + unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, + bool DryRun) { + if (Current.isNot(tok::string_literal)) + return 0; + + unsigned Penalty = 0; + unsigned TailOffset = 0; + unsigned TailLength = Current.FormatTok.TokenLength; + unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; + unsigned OffsetFromStart = 0; + while (StartColumn + TailLength > getColumnLimit()) { + StringRef Text = StringRef(Current.FormatTok.Tok.getLiteralData() + + TailOffset, TailLength); + StringRef::size_type SplitPoint = + getSplitPoint(Text, getColumnLimit() - StartColumn - 1); + if (SplitPoint == StringRef::npos) + break; + assert(SplitPoint != 0); + // +2, because 'Text' starts after the opening quotes, and does not + // include the closing quote we need to insert. + unsigned WhitespaceStartColumn = + StartColumn + OffsetFromStart + SplitPoint + 2; + State.Stack.back().LastSpace = StartColumn; + if (!DryRun) { + Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"", + Line.InPPDirective, StartColumn, + WhitespaceStartColumn, Style); + } + TailOffset += SplitPoint + 1; + TailLength -= SplitPoint + 1; + OffsetFromStart = 1; + Penalty += 100; + } + State.Column = StartColumn + TailLength; + return Penalty; + } + + StringRef::size_type + getSplitPoint(StringRef Text, StringRef::size_type Offset) { + // FIXME: Implement more sophisticated splitting mechanism, and a fallback. + return Text.rfind(' ', Offset); } unsigned getColumnLimit() { @@ -767,7 +847,7 @@ private: StateNode *Node = new (Allocator.Allocate()) StateNode(PreviousNode->State, NewLine, PreviousNode); - addTokenToState(NewLine, true, Node->State); + Penalty += addTokenToState(NewLine, true, Node->State); if (Node->State.Column > getColumnLimit()) { unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 05e5d37..4fefb96 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2843,5 +2843,65 @@ TEST_F(FormatTest, ReformatRegionAdjustsIndent) { 13, 0, getLLVMStyle())); } +TEST_F(FormatTest, BreakStringLiterals) { + EXPECT_EQ("\"some text \"\n" + "\"other\";", + format("\"some text other\";", getLLVMStyleWithColumns(12))); + EXPECT_EQ( + "#define A \\\n" + " \"some \" \\\n" + " \"text \" \\\n" + " \"other\";", + format("#define A \"some text other\";", getLLVMStyleWithColumns(12))); + EXPECT_EQ( + "#define A \\\n" + " \"so \" \\\n" + " \"text \" \\\n" + " \"other\";", + format("#define A \"so text other\";", getLLVMStyleWithColumns(12))); + + EXPECT_EQ("\"some text\"", + format("\"some text\"", getLLVMStyleWithColumns(1))); + EXPECT_EQ("\"some text\"", + format("\"some text\"", getLLVMStyleWithColumns(11))); + EXPECT_EQ("\"some \"\n" + "\"text\"", + format("\"some text\"", getLLVMStyleWithColumns(10))); + EXPECT_EQ("\"some \"\n" + "\"text\"", + format("\"some text\"", getLLVMStyleWithColumns(7))); + EXPECT_EQ("\"some text\"", + format("\"some text\"", getLLVMStyleWithColumns(6))); + + EXPECT_EQ("variable =\n" + " \"long string \"\n" + " \"literal\";", + format("variable = \"long string literal\";", + getLLVMStyleWithColumns(20))); + + EXPECT_EQ("variable = f(\n" + " \"long string \"\n" + " \"literal\", short,\n" + " loooooooooooooooooooong);", + format("variable = f(\"long string literal\", short, " + "loooooooooooooooooooong);", + getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "f(\"one two\".split(\n" + " variable));", + format("f(\"one two\".split(variable));", getLLVMStyleWithColumns(20))); + EXPECT_EQ("f(\"one two three four five six \"\n" + " \"seven\".split(\n" + " really_looooong_variable));", + format("f(\"one two three four five six seven\"." + "split(really_looooong_variable));", + getLLVMStyleWithColumns(33))); + + EXPECT_EQ("f(\"some \"\n" + " \"text\",\n" + " other);", + format("f(\"some text\", other);", getLLVMStyleWithColumns(10))); +} + } // end namespace tooling } // end namespace clang -- 2.7.4