if (Style.Language == FormatStyle::LK_TextProto)
tryParsePythonComment();
tryMergePreviousTokens();
+ if (Style.isCSharp())
+ // This needs to come after tokens have been merged so that C#
+ // string literals are correctly identified.
+ handleCSharpVerbatimAndInterpolatedStrings();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->Tok.isNot(tok::eof));
// Search for verbatim or interpolated string literals @"ABC" or
// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
// prevent splitting of @, $ and ".
+// Merging of multiline verbatim strings with embedded '"' is handled in
+// handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing.
bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
if (Tokens.size() < 2)
return false;
- auto &CSharpStringLiteral = *(Tokens.end() - 2);
-
// Interpolated strings could contain { } with " characters inside.
// $"{x ?? "null"}"
// should not be split into $"{x ?? ", null, "}" but should treated as a
}
}
- // verbatim strings could contain "" which C# sees as an escaped ".
- // @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
- // merging into a single string literal.
+ // Look for @"aaaaaa" or $"aaaaaa".
auto &String = *(Tokens.end() - 1);
if (!String->is(tok::string_literal))
return false;
- if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
- (CSharpStringLiteral->TokenText.startswith(R"(@")") ||
- CSharpStringLiteral->TokenText.startswith(R"($@")"))) {
- CSharpStringLiteral->TokenText = StringRef(
- CSharpStringLiteral->TokenText.begin(),
- String->TokenText.end() - CSharpStringLiteral->TokenText.begin());
- CSharpStringLiteral->ColumnWidth += String->ColumnWidth;
- Tokens.erase(Tokens.end() - 1);
- return true;
- }
-
auto &At = *(Tokens.end() - 2);
-
- // Look for @"aaaaaa" or $"aaaaaa".
if (!(At->is(tok::at) || At->TokenText == "$"))
return false;
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
+void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
+ FormatToken *CSharpStringLiteral = Tokens.back();
+
+ if (CSharpStringLiteral->Type != TT_CSharpStringLiteral)
+ return;
+
+ // Deal with multiline strings.
+ if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
+ CSharpStringLiteral->TokenText.startswith(R"($@")")))
+ return;
+
+ const char *StrBegin =
+ Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
+ const char *Offset = StrBegin;
+ if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
+ Offset += 2;
+ else // CSharpStringLiteral->TokenText.startswith(R"($@")")
+ Offset += 3;
+
+ // Look for a terminating '"' in the current file buffer.
+ // Make no effort to format code within an interpolated or verbatim string.
+ for (; Offset != Lex->getBuffer().end(); ++Offset) {
+ if (Offset[0] == '"') {
+ // "" within a verbatim string is an escaped double quote: skip it.
+ if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
+ ++Offset;
+ else
+ break;
+ }
+ }
+
+ // Make no attempt to format code properly if a verbatim string is
+ // unterminated.
+ if (Offset == Lex->getBuffer().end())
+ return;
+
+ StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
+ CSharpStringLiteral->TokenText = LiteralText;
+
+ // Adjust width for potentially multiline string literals.
+ size_t FirstBreak = LiteralText.find('\n');
+ StringRef FirstLineText = FirstBreak == StringRef::npos
+ ? LiteralText
+ : LiteralText.substr(0, FirstBreak);
+ CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs(
+ FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,
+ Encoding);
+ size_t LastBreak = LiteralText.rfind('\n');
+ if (LastBreak != StringRef::npos) {
+ CSharpStringLiteral->IsMultiline = true;
+ unsigned StartColumn = 0; // The template tail spans the entire line.
+ CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
+ Style.TabWidth, Encoding);
+ }
+
+ SourceLocation loc = Offset < Lex->getBuffer().end()
+ ? Lex->getSourceLocation(Offset + 1)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(loc));
+}
+
void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
- verifyFormat(R"(string str = @"""")", Style);
- verifyFormat(R"(string str = @"""Hello world""")", Style);
- verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
+ verifyFormat(R"(string str = @"""";)", Style);
+ verifyFormat(R"(string str = @"""Hello world""";)", Style);
+ verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style);
}
TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
}
+TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) {
+ // Use MS style as Google Style inserts a line break before multiline strings.
+
+ // verifyFormat does not understand multiline C# string-literals
+ // so check the format explicitly.
+
+ FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp);
+
+ std::string Code = R"(string s1 = $@"some code:
+ class {className} {{
+ {className}() {{}}
+ }}";)";
+
+ EXPECT_EQ(Code, format(Code, Style));
+
+ // Multiline string in the middle of a function call.
+ Code = R"(
+var x = foo(className, $@"some code:
+ class {className} {{
+ {className}() {{}}
+ }}",
+ y);)"; // y aligned with `className` arg.
+
+ EXPECT_EQ(Code, format(Code, Style));
+
+ // Interpolated string with embedded multiline string.
+ Code = R"(Console.WriteLine($"{string.Join(@",
+ ", values)}");)";
+
+ EXPECT_EQ(Code, format(Code, Style));
+}
+
} // namespace format
} // end namespace clang