From 9d7178139c4c8d28818cb48af7c0b01fda49d60e Mon Sep 17 00:00:00 2001 From: Martin Probst Date: Thu, 2 Aug 2018 11:52:08 +0000 Subject: [PATCH] clang-format: fix a crash in comment wraps. Summary: Previously, clang-format would crash if it tried to wrap an overlong single line comment, because two parts of the code inserted a break in the same location. /** heregoesalongcommentwithnospace */ This wasn't previously noticed as it could only trigger for an overlong single line comment that did have no breaking opportunities except for a whitespace at the very beginning. This also introduces a check for JavaScript to not ever wrap a comment before an opening curly brace: /** @mods {donotbreakbeforethecurly} */ This is because some machinery parsing these tags sometimes supports breaks before a possible `{`, but in some other cases does not. Previously clang-format was careful never to wrap a line with certain tags on it. The better solution is to specifically disable wrapping before the problematic token: this allows wrapping and aligning comments but still avoids the problem. Reviewers: krasimir Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D50177 llvm-svn: 338706 --- clang/lib/Format/BreakableToken.cpp | 26 +++++++++++++--- clang/unittests/Format/FormatTestComments.cpp | 6 ++++ clang/unittests/Format/FormatTestJS.cpp | 45 +++++++++++++++------------ 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index fc2f891..300e3f8 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -67,10 +67,11 @@ static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, - encoding::Encoding Encoding) { - LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit - << "\", Content start: " << ContentStartColumn - << "\n"); + encoding::Encoding Encoding, + const FormatStyle &Style) { + LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text + << "\", Column limit: " << ColumnLimit + << ", Content start: " << ContentStartColumn << "\n"); if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); @@ -95,6 +96,13 @@ static BreakableToken::Split getCommentSplit(StringRef Text, if (SpaceOffset != StringRef::npos && kNumberedListRegexp->match(Text.substr(SpaceOffset).ltrim(Blanks))) SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); + // In JavaScript, some @tags can be followed by {, and machinery that parses + // these comments will fail to understand the comment if followed by a line + // break. So avoid ever breaking before a {. + if (Style.Language == FormatStyle::LK_JavaScript && + SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() && + Text[SpaceOffset + 1] == '{') + SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); if (SpaceOffset == StringRef::npos || // Don't break at leading whitespace. @@ -109,6 +117,12 @@ static BreakableToken::Split getCommentSplit(StringRef Text, Blanks, std::max(MaxSplitBytes, FirstNonWhitespace)); } if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { + // adaptStartOfLine will break after lines starting with /** if the comment + // is broken anywhere. Avoid emitting this break twice here. + // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will + // insert a break after /**, so this code must not insert the same break. + if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*') + return BreakableToken::Split(StringRef::npos, 0); StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); return BreakableToken::Split(BeforeCut.size(), @@ -260,7 +274,7 @@ BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, return Split(StringRef::npos, 0); return getCommentSplit(Content[LineIndex].substr(TailOffset), ContentStartColumn, ColumnLimit, Style.TabWidth, - Encoding); + Encoding, Style); } void BreakableComment::compressWhitespace( @@ -620,6 +634,8 @@ void BreakableBlockComment::adaptStartOfLine( if (DelimitersOnNewline) { // Since we're breaking at index 1 below, the break position and the // break length are the same. + // Note: this works because getCommentSplit is careful never to split at + // the beginning of a line. size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); if (BreakLength != StringRef::npos) insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp index 2b07ded..9f43677 100644 --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -1254,6 +1254,12 @@ TEST_F(FormatTestComments, SplitsLongLinesInComments) { " */", getLLVMStyleWithColumns(20))); + // This reproduces a crashing bug where both adaptStartOfLine and + // getCommentSplit were trying to wrap after the "/**". + EXPECT_EQ("/** multilineblockcommentwithnowrapopportunity */", + format("/** multilineblockcommentwithnowrapopportunity */", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/*\n" "\n" "\n" diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index e7808c6..fe14839 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -191,31 +191,32 @@ TEST_F(FormatTestJS, JSDocComments) { // Break a single line long jsdoc comment pragma. EXPECT_EQ("/**\n" - " * @returns\n" - " * {string}\n" - " * jsdoc line 12\n" + " * @returns {string} jsdoc line 12\n" " */", format("/** @returns {string} jsdoc line 12 */", getGoogleJSStyleWithColumns(20))); - EXPECT_EQ("/**\n" - " * @returns\n" - " * {string}\n" + " * @returns {string}\n" " * jsdoc line 12\n" " */", + format("/** @returns {string} jsdoc line 12 */", + getGoogleJSStyleWithColumns(25))); + + EXPECT_EQ("/**\n" + " * @returns {string} jsdoc line 12\n" + " */", format("/** @returns {string} jsdoc line 12 */", getGoogleJSStyleWithColumns(20))); // FIXME: this overcounts the */ as a continuation of the 12 when breaking. // Related to the FIXME in BreakableBlockComment::getRangeLength. EXPECT_EQ("/**\n" - " * @returns\n" - " * {string}\n" - " * jsdoc line\n" + " * @returns {string}\n" + " * jsdoc line line\n" " * 12\n" " */", - format("/** @returns {string} jsdoc line 12*/", - getGoogleJSStyleWithColumns(20))); + format("/** @returns {string} jsdoc line line 12*/", + getGoogleJSStyleWithColumns(25))); // Fix a multiline jsdoc comment ending in a comment pragma. EXPECT_EQ("/**\n" @@ -2038,27 +2039,32 @@ TEST_F(FormatTestJS, WrapAfterParen) { TEST_F(FormatTestJS, JSDocAnnotations) { verifyFormat("/**\n" - " * @exports\n" - " * {this.is.a.long.path.to.a.Type}\n" + " * @exports {this.is.a.long.path.to.a.Type}\n" " */", "/**\n" " * @exports {this.is.a.long.path.to.a.Type}\n" " */", getGoogleJSStyleWithColumns(20)); verifyFormat("/**\n" - " * @mods\n" - " * {this.is.a.long.path.to.a.Type}\n" + " * @mods {this.is.a.long.path.to.a.Type}\n" + " */", + "/**\n" + " * @mods {this.is.a.long.path.to.a.Type}\n" + " */", + getGoogleJSStyleWithColumns(20)); + verifyFormat("/**\n" + " * @mods {this.is.a.long.path.to.a.Type}\n" " */", "/**\n" " * @mods {this.is.a.long.path.to.a.Type}\n" " */", getGoogleJSStyleWithColumns(20)); verifyFormat("/**\n" - " * @param\n" - " * {this.is.a.long.path.to.a.Type}\n" + " * @param {canWrap\n" + " * onSpace}\n" " */", "/**\n" - " * @param {this.is.a.long.path.to.a.Type}\n" + " * @param {canWrap onSpace}\n" " */", getGoogleJSStyleWithColumns(20)); verifyFormat("/**\n" @@ -2083,8 +2089,7 @@ TEST_F(FormatTestJS, JSDocAnnotations) { " /**\n" " * long long long\n" " * long\n" - " * @param\n" - " * {this.is.a.long.path.to.a.Type}\n" + " * @param {this.is.a.long.path.to.a.Type}\n" " * a\n" " * long long long\n" " * long long\n" -- 2.7.4