From 64d31edef33097f3878a12285a6db4d1e9488454 Mon Sep 17 00:00:00 2001 From: Martin Probst Date: Tue, 8 Aug 2017 14:52:42 +0000 Subject: [PATCH] clang-format: [JS] handle single lines comments ending in `\\`. Summary: Previously, clang-format would consider the following code line to be part of the comment and incorrectly format the rest of the file. Reviewers: djasper Subscribers: klimek, cfe-commits Differential Revision: https://reviews.llvm.org/D36159 llvm-svn: 310365 --- clang/lib/Format/FormatTokenLexer.cpp | 28 ++++++++++++++++++++++++++++ clang/unittests/Format/FormatTestJS.cpp | 22 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 45c3ae1..5e5bfc8 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -529,6 +529,34 @@ FormatToken *FormatTokenLexer::getNextToken() { readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { + size_t BackslashPos = FormatTok->TokenText.find('\\'); + while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { + const char *Offset = Lex->getBufferLocation(); + Offset -= FormatTok->TokenText.size(); + Offset += BackslashPos + 1; + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); + FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); + FormatTok->ColumnWidth = encoding::columnWidthWithTabs( + FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, + Encoding); + break; + } + BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); + } + } + // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index 67004f2..12ff750 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -2074,5 +2074,27 @@ TEST_F(FormatTestJS, NestedLiterals) { "};", FourSpaces); } +TEST_F(FormatTestJS, BackslashesInComments) { + verifyFormat("// hello \\\n" + "if (x) foo();\n", + "// hello \\\n" + " if ( x) \n" + " foo();\n"); + verifyFormat("/* ignore \\\n" + " */\n" + "if (x) foo();\n", + "/* ignore \\\n" + " */\n" + " if ( x) foo();\n"); + verifyFormat("// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe();\n", + "// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe( );\n"); +} + } // end namespace tooling } // end namespace clang -- 2.7.4