From 5085d9b27594224856be21d2a4507269a3920e2a Mon Sep 17 00:00:00 2001 From: Manuel Klimek Date: Fri, 8 Mar 2013 18:59:48 +0000 Subject: [PATCH] Fixes breaking of string literals. 1. We now ignore all non-default string literals, including raw literals. 2. We do not break inside escape sequences any more. FIXME: We still break in trigraphs. llvm-svn: 176710 --- clang/lib/Format/Format.cpp | 58 +++++++++++++++++++++++++++++++++-- clang/unittests/Format/FormatTest.cpp | 39 +++++++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index e738a5c3..0e556fe 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -729,6 +729,9 @@ private: bool DryRun) { if (Current.isNot(tok::string_literal)) return 0; + // Only break up default narrow strings. + if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0) + return 0; unsigned Penalty = 0; unsigned TailOffset = 0; @@ -774,12 +777,63 @@ private: StringRef::size_type SlashOffset = Text.rfind('/', Offset); if (SlashOffset != StringRef::npos && SlashOffset != 0) return SlashOffset; - if (Offset > 1) + StringRef::size_type Split = getStartOfCharacter(Text, Offset); + if (Split != StringRef::npos && Split > 1) // Do not split at 0. - return Offset - 1; + return Split - 1; return StringRef::npos; } + StringRef::size_type + getStartOfCharacter(StringRef Text, StringRef::size_type Offset) { + StringRef::size_type NextEscape = Text.find('\\'); + while (NextEscape != StringRef::npos && NextEscape < Offset) { + StringRef::size_type SequenceLength = + getEscapeSequenceLength(Text.substr(NextEscape)); + if (Offset < NextEscape + SequenceLength) + return NextEscape; + NextEscape = Text.find('\\', NextEscape + SequenceLength); + } + return Offset; + } + + unsigned getEscapeSequenceLength(StringRef Text) { + assert(Text[0] == '\\'); + if (Text.size() < 2) + return 1; + + switch (Text[1]) { + case 'u': + return 6; + case 'U': + return 10; + case 'x': + return getHexLength(Text); + default: + if (Text[1] >= '0' && Text[1] <= '7') + return getOctalLength(Text); + return 2; + } + } + + unsigned getHexLength(StringRef Text) { + unsigned I = 2; // Point after '\x'. + while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || + (Text[I] >= 'a' && Text[I] <= 'f') || + (Text[I] >= 'A' && Text[I] <= 'F'))) { + ++I; + } + return I; + } + + unsigned getOctalLength(StringRef Text) { + unsigned I = 1; + while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { + ++I; + } + return I; + } + unsigned getColumnLimit() { return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 1e92592..0ac3c10 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -3108,5 +3108,44 @@ TEST_F(FormatTest, BreakStringLiterals) { format("\"split/pathat/slashes\"", getLLVMStyleWithColumns(10))); } +TEST_F(FormatTest, DoNotBreakStringLiteralsInEscapeSequence) { + EXPECT_EQ("\"\\a\"", + format("\"\\a\"", getLLVMStyleWithColumns(3))); + EXPECT_EQ("\"\\\"", + format("\"\\\"", getLLVMStyleWithColumns(2))); + EXPECT_EQ("\"test\"\n" + "\"\\n\"", + format("\"test\\n\"", getLLVMStyleWithColumns(7))); + EXPECT_EQ("\"tes\\\\\"\n" + "\"n\"", + format("\"tes\\\\n\"", getLLVMStyleWithColumns(7))); + EXPECT_EQ("\"\\\\\\\\\"\n" + "\"\\n\"", + format("\"\\\\\\\\\\n\"", getLLVMStyleWithColumns(7))); + EXPECT_EQ("\"\\uff01\"", + format("\"\\uff01\"", getLLVMStyleWithColumns(7))); + EXPECT_EQ("\"\\uff01\"\n" + "\"test\"", + format("\"\\uff01test\"", getLLVMStyleWithColumns(8))); + EXPECT_EQ("\"\\Uff01ff02\"", + format("\"\\Uff01ff02\"", getLLVMStyleWithColumns(11))); + EXPECT_EQ("\"\\x000000000001\"\n" + "\"next\"", + format("\"\\x000000000001next\"", getLLVMStyleWithColumns(16))); + EXPECT_EQ("\"\\x000000000001next\"", + format("\"\\x000000000001next\"", getLLVMStyleWithColumns(15))); + EXPECT_EQ("\"\\x000000000001\"", + format("\"\\x000000000001\"", getLLVMStyleWithColumns(7))); + EXPECT_EQ("\"test\"\n" + "\"\\000000\"\n" + "\"000001\"", + format("\"test\\000000000001\"", getLLVMStyleWithColumns(9))); + EXPECT_EQ("\"test\\000\"\n" + "\"000000001\"", + format("\"test\\000000000001\"", getLLVMStyleWithColumns(10))); + EXPECT_EQ("R\"(\\x\\x00)\"\n", + format("R\"(\\x\\x00)\"\n", getLLVMStyleWithColumns(7))); +} + } // end namespace tooling } // end namespace clang -- 2.7.4