From: David Turner Date: Sun, 5 Feb 2023 21:19:11 +0000 (-0800) Subject: [clang-format] Fix inconsistent annotation of operator& X-Git-Tag: upstream/17.0.6~18535 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=35f2ac1763adcbd5ee9b49a98de24c0c420c323e;p=platform%2Fupstream%2Fllvm.git [clang-format] Fix inconsistent annotation of operator& Token annotator incorrectly annotates operator& as a reference type in situations like Boost serialization archives: https://www.boost.org/doc/libs/1_81_0/libs/serialization/doc/tutorial.html Add annotation rules for standalone and chained operator& instances while preserving behavior for reference declarations at class scope. Add tests to validate annotation and formatting behavior. Differential Revision: https://reviews.llvm.org/D141959 --- diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 1b1123f..161720e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -111,14 +111,29 @@ static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) { class AnnotatingParser { public: AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, - const AdditionalKeywords &Keywords) + const AdditionalKeywords &Keywords, + SmallVector &Scopes) : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), - Keywords(Keywords) { + Keywords(Keywords), Scopes(Scopes) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); resetTokenMetadata(); } private: + ScopeType getScopeType(const FormatToken &Token) const { + switch (Token.getType()) { + case TT_FunctionLBrace: + case TT_LambdaLBrace: + return ST_Function; + case TT_ClassLBrace: + case TT_StructLBrace: + case TT_UnionLBrace: + return ST_Class; + default: + return ST_Other; + } + } + bool parseAngle() { if (!CurrentToken || !CurrentToken->Previous) return false; @@ -849,6 +864,9 @@ private: unsigned CommaCount = 0; while (CurrentToken) { if (CurrentToken->is(tok::r_brace)) { + assert(!Scopes.empty()); + assert(Scopes.back() == getScopeType(OpeningBrace)); + Scopes.pop_back(); assert(OpeningBrace.Optional == CurrentToken->Optional); OpeningBrace.MatchingParen = CurrentToken; CurrentToken->MatchingParen = &OpeningBrace; @@ -1148,6 +1166,7 @@ private: if (Previous && Previous->getType() != TT_DictLiteral) Previous->setType(TT_SelectorName); } + Scopes.push_back(getScopeType(*Tok)); if (!parseBrace()) return false; break; @@ -1178,6 +1197,9 @@ private: case tok::r_square: return false; case tok::r_brace: + // Don't pop scope when encountering unbalanced r_brace. + if (!Scopes.empty()) + Scopes.pop_back(); // Lines can start with '}'. if (Tok->Previous) return false; @@ -2448,6 +2470,28 @@ private: if (IsExpression && !Contexts.back().CaretFound) return TT_BinaryOperator; + // Opeartors at class scope are likely pointer or reference members. + if (!Scopes.empty() && Scopes.back() == ST_Class) + return TT_PointerOrReference; + + // Tokens that indicate member access or chained operator& use. + auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) { + return !token || token->isOneOf(tok::amp, tok::period, tok::arrow, + tok::arrowstar, tok::periodstar); + }; + + // It's more likely that & represents operator& than an uninitialized + // reference. + if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() && + IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) && + NextToken && NextToken->Tok.isAnyIdentifier()) { + if (auto NextNext = NextToken->getNextNonComment(); + NextNext && + (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) { + return TT_BinaryOperator; + } + } + return TT_PointerOrReference; } @@ -2485,6 +2529,8 @@ private: bool AutoFound; const AdditionalKeywords &Keywords; + SmallVector &Scopes; + // Set of "<" tokens that do not open a template parameter list. If parseAngle // determines that a specific token can't be a template opener, it will make // same decision irrespective of the decisions for tokens leading up to it. @@ -2765,11 +2811,11 @@ static unsigned maxNestingDepth(const AnnotatedLine &Line) { return Result; } -void TokenAnnotator::annotate(AnnotatedLine &Line) const { +void TokenAnnotator::annotate(AnnotatedLine &Line) { for (auto &Child : Line.Children) annotate(*Child); - AnnotatingParser Parser(Style, Line, Keywords); + AnnotatingParser Parser(Style, Line, Keywords, Scopes); Line.Type = Parser.parseLine(); // With very deep nesting, ExpressionParser uses lots of stack and the diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h index 354511b..0a6b4f6 100644 --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -34,6 +34,15 @@ enum LineType { LT_CommentAbovePPDirective, }; +enum ScopeType { + // Contained in class declaration/definition. + ST_Class, + // Contained within function definition. + ST_Function, + // Contained within other scope block (loop, if/else, etc). + ST_Other, +}; + class AnnotatedLine { public: AnnotatedLine(const UnwrappedLine &Line) @@ -178,7 +187,7 @@ public: // FIXME: Can/should this be done in the UnwrappedLineParser? void setCommentLineLevels(SmallVectorImpl &Lines) const; - void annotate(AnnotatedLine &Line) const; + void annotate(AnnotatedLine &Line); void calculateFormattingInformation(AnnotatedLine &Line) const; private: @@ -220,6 +229,8 @@ private: const FormatStyle &Style; const AdditionalKeywords &Keywords; + + SmallVector Scopes; }; } // end namespace format diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index e61fb5f..df8ce01 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -11288,6 +11288,13 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("int operator()(T (&&)[N]) { return 1; }"); verifyFormat("int operator()(T (&)[N]) { return 0; }"); + + verifyFormat("val1 & val2;"); + verifyFormat("val1 & val2 & val3;"); + verifyFormat("class c {\n" + " void func(type &a) { a & member; }\n" + " anotherType &member;\n" + "}"); } TEST_F(FormatTest, UnderstandsAttributes) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 3ed6521..b27bb69 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -175,6 +175,73 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) { ASSERT_EQ(Tokens.size(), 17u) << Tokens; EXPECT_TOKEN(Tokens[9], tok::ampamp, TT_PointerOrReference); EXPECT_TOKEN(Tokens[12], tok::ampamp, TT_PointerOrReference); + + Tokens = annotate("Type1 &val1 = val2;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_PointerOrReference); + + Tokens = annotate("Type1 *val1 = &val2;"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::star, TT_PointerOrReference); + EXPECT_TOKEN(Tokens[4], tok::amp, TT_UnaryOperator); + + Tokens = annotate("val1 & val2;"); + ASSERT_EQ(Tokens.size(), 5u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1 & val2.member;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1 & val2.*member;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1.*member & val2;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1 & val2->*member;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1->member & val2;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1 & val2 & val3;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator); + + Tokens = annotate("val1 & val2 // comment\n" + " & val3;"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[4], tok::amp, TT_BinaryOperator); + + Tokens = + annotate("val1 & val2.member & val3.member() & val4 & val5->member;"); + ASSERT_EQ(Tokens.size(), 19u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[5], tok::amp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[11], tok::amp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[13], tok::amp, TT_BinaryOperator); + + Tokens = annotate("class c {\n" + " void func(type &a) { a & member; }\n" + " anotherType &member;\n" + "}"); + ASSERT_EQ(Tokens.size(), 22u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::amp, TT_PointerOrReference); + EXPECT_TOKEN(Tokens[12], tok::amp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[17], tok::amp, TT_PointerOrReference); + + Tokens = annotate("struct S {\n" + " auto Mem = C & D;\n" + "}"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::amp, TT_BinaryOperator); } TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) {