From: Daniel Jasper Date: Fri, 8 Feb 2013 15:28:42 +0000 (+0000) Subject: Implement a tiny expression parser to improve formatting decisions. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=400adc64dab61af2f90e5bbc09bb62ccc2a7a607;p=platform%2Fupstream%2Fllvm.git Implement a tiny expression parser to improve formatting decisions. With this patch, the formatter introduces 'fake' parenthesis according to the operator precedence of binary operators. Before: return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA || bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB || cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC || dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD; f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa); After: return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA || bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB || cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC || dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD; f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa); Future improvements: - Get rid of some of the hacky ways to nicely format certain constructs. - Merge this parser and the AnnotatingParser as we now have several parsers that analyze (), [], etc. llvm-svn: 174714 --- diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 932f867..bef5f92 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -205,7 +205,7 @@ private: while (I != E) { unsigned Spaces = I->Spaces + Column - I->MinColumn; storeReplacement(I->Tok, std::string(I->NewLines, '\n') + - std::string(Spaces, ' ')); + std::string(Spaces, ' ')); ++I; } } @@ -251,6 +251,7 @@ public: /*HasMultiParameterLine=*/ false)); State.VariablePos = 0; State.LineContainsContinuedForLoopSection = false; + State.ParenLevel = 0; DEBUG({ DebugTokenState(*State.NextToken); @@ -287,8 +288,8 @@ private: struct ParenState { ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool HasMultiParameterLine) - : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0), - FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), + : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), + BreakBeforeClosingBrace(false), QuestionColumn(0), AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), HasMultiParameterLine(HasMultiParameterLine), ColonPos(0) { } @@ -304,9 +305,6 @@ private: /// OtherParameter)); unsigned LastSpace; - /// \brief This is the column of the first token after an assignment. - unsigned AssignmentColumn; - /// \brief The position the first "<<" operator encountered on each level. /// /// Used to align "<<" operators. 0 if no such operator has been encountered @@ -342,8 +340,6 @@ private: return Indent < Other.Indent; if (LastSpace != Other.LastSpace) return LastSpace < Other.LastSpace; - if (AssignmentColumn != Other.AssignmentColumn) - return AssignmentColumn < Other.AssignmentColumn; if (FirstLessLess != Other.FirstLessLess) return FirstLessLess < Other.FirstLessLess; if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) @@ -380,6 +376,9 @@ private: /// \brief \c true if this line contains a continued for-loop section. bool LineContainsContinuedForLoopSection; + /// \brief The level of nesting inside (), [], <> and {}. + unsigned ParenLevel; + /// \brief A stack keeping track of properties applying to parenthesis /// levels. std::vector Stack; @@ -395,6 +394,8 @@ private: if (Other.LineContainsContinuedForLoopSection != LineContainsContinuedForLoopSection) return LineContainsContinuedForLoopSection; + if (Other.ParenLevel != ParenLevel) + return Other.ParenLevel < ParenLevel; return Other.Stack < Stack; } }; @@ -411,7 +412,6 @@ private: const AnnotatedToken &Current = *State.NextToken; const AnnotatedToken &Previous = *State.NextToken->Parent; assert(State.Stack.size()); - unsigned ParenLevel = State.Stack.size() - 1; if (Current.Type == TT_ImplicitStringLiteral) { State.Column += State.NextToken->FormatTok.WhiteSpaceLength + @@ -431,9 +431,9 @@ private: Previous.is(tok::string_literal)) { State.Column = State.Column - Previous.FormatTok.TokenLength; } else if (Current.is(tok::lessless) && - State.Stack[ParenLevel].FirstLessLess != 0) { - State.Column = State.Stack[ParenLevel].FirstLessLess; - } else if (ParenLevel != 0 && + State.Stack.back().FirstLessLess != 0) { + State.Column = State.Stack.back().FirstLessLess; + } else if (State.ParenLevel != 0 && (Previous.is(tok::equal) || Previous.is(tok::coloncolon) || Current.is(tok::period) || Current.is(tok::arrow) || Current.is(tok::question))) { @@ -445,15 +445,12 @@ private: } else if (Current.Type == TT_ConditionalExpr) { State.Column = State.Stack.back().QuestionColumn; } else if (Previous.is(tok::comma) && State.VariablePos != 0 && - ((RootToken.is(tok::kw_for) && ParenLevel == 1) || - ParenLevel == 0)) { + ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) || + State.ParenLevel == 0)) { State.Column = State.VariablePos; } else if (State.NextToken->Parent->ClosesTemplateDeclaration || Current.Type == TT_StartOfName) { - State.Column = State.Stack[ParenLevel].Indent - 4; - } else if (Previous.Type == TT_BinaryOperator && - State.Stack.back().AssignmentColumn != 0) { - State.Column = State.Stack.back().AssignmentColumn; + State.Column = State.Stack.back().Indent - 4; } else if (Current.Type == TT_ObjCSelectorName) { if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) { State.Column = @@ -466,7 +463,7 @@ private: } else if (Previous.Type == TT_ObjCMethodExpr) { State.Column = State.Stack.back().Indent + 4; } else { - State.Column = State.Stack[ParenLevel].Indent; + State.Column = State.Stack.back().Indent; } if (Previous.is(tok::comma) && !State.Stack.back().AvoidBinPacking) @@ -484,12 +481,12 @@ private: WhitespaceStartColumn, Style); } - State.Stack[ParenLevel].LastSpace = State.Column; + State.Stack.back().LastSpace = State.Column; if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr) - State.Stack[ParenLevel].Indent += 2; + State.Stack.back().Indent += 2; } else { if (Current.is(tok::equal) && - (RootToken.is(tok::kw_for) || ParenLevel == 0)) + (RootToken.is(tok::kw_for) || State.ParenLevel == 0)) State.VariablePos = State.Column - Previous.FormatTok.TokenLength; unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0; @@ -510,26 +507,19 @@ private: State.Column + Spaces + Current.FormatTok.TokenLength; } - // FIXME: Do we need to do this for assignments nested in other - // expressions? - if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 && - !isTrailingComment(Current) && - (getPrecedence(Previous) == prec::Assignment || - Previous.is(tok::kw_return))) - State.Stack.back().AssignmentColumn = State.Column + Spaces; if (Current.Type != TT_LineComment && (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || State.NextToken->Parent->Type == TT_TemplateOpener)) - State.Stack[ParenLevel].Indent = State.Column + Spaces; + State.Stack.back().Indent = State.Column + Spaces; if (Previous.is(tok::comma) && !isTrailingComment(Current)) - State.Stack[ParenLevel].HasMultiParameterLine = true; + State.Stack.back().HasMultiParameterLine = true; State.Column += Spaces; if (Current.is(tok::l_paren) && Previous.is(tok::kw_if)) // Treat the condition inside an if as if it was a second function // parameter, i.e. let nested calls have an indent of 4. State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". - else if (Previous.is(tok::comma) && ParenLevel != 0) + else if (Previous.is(tok::comma) && State.ParenLevel != 0) // Top-level spaces are exempt as that mostly leads to better results. State.Stack.back().LastSpace = State.Column; else if ((Previous.Type == TT_BinaryOperator || @@ -551,7 +541,7 @@ private: State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking && Newline && - (Line.First.isNot(tok::kw_for) || ParenLevel != 1)) { + (Line.First.isNot(tok::kw_for) || State.ParenLevel != 1)) { // If we are breaking after '(', '{', '<', this is not bin packing unless // AllowAllParametersOfDeclarationOnNextLine is false. if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) && @@ -587,6 +577,13 @@ private: State.Stack.back().BreakBeforeParameter = true; } + // Insert scopes created by fake parenthesis. + for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) { + ParenState NewParenState = State.Stack.back(); + NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent); + State.Stack.push_back(NewParenState); + } + // If we encounter an opening (, [, { or <, we add a level to our stacks to // prepare for the following tokens. if (Current.is(tok::l_paren) || Current.is(tok::l_square) || @@ -604,6 +601,7 @@ private: State.Stack.push_back( ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking, State.Stack.back().HasMultiParameterLine)); + ++State.ParenLevel; } // If this '[' opens an ObjC call, determine whether all parameters fit into @@ -620,6 +618,12 @@ private: (Current.is(tok::r_brace) && State.NextToken != &RootToken) || State.NextToken->Type == TT_TemplateCloser) { State.Stack.pop_back(); + --State.ParenLevel; + } + + // Remove scopes created by fake parenthesis. + for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { + State.Stack.pop_back(); } if (State.NextToken->Children.empty()) @@ -760,7 +764,7 @@ private: return true; if ((State.NextToken->Type == TT_CtorInitializerColon || (State.NextToken->Parent->ClosesTemplateDeclaration && - State.Stack.size() == 1))) + State.ParenLevel == 0))) return true; return false; } @@ -885,8 +889,9 @@ public: ++CountBoundToType; } - if (Tok->Type == TT_TemplateCloser && Tok->Parent->Type == - TT_TemplateCloser && Tok->FormatTok.WhiteSpaceLength == 0) + if (Tok->Type == TT_TemplateCloser && + Tok->Parent->Type == TT_TemplateCloser && + Tok->FormatTok.WhiteSpaceLength == 0) HasCpp03IncompatibleFormat = true; Tok = &Tok->Children[0]; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 8a38454..485624b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -52,6 +52,12 @@ static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { return getPreviousToken(const_cast(Tok)); } +static bool isTrailingComment(AnnotatedToken *Tok) { + return Tok != NULL && Tok->is(tok::comment) && + (Tok->Children.empty() || + Tok->Children[0].FormatTok.NewlinesBefore > 0); +} + // Returns the next token ignoring comments. static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { if (Tok.Children.empty()) @@ -681,12 +687,91 @@ private: bool KeywordVirtualFound; }; +/// \brief Parses binary expressions by inserting fake parenthesis based on +/// operator precedence. +class ExpressionParser { +public: + ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} + + /// \brief Parse expressions with the given operatore precedence. + void parse(unsigned Precedence = prec::Unknown) { + if (Precedence > prec::PointerToMember || Current == NULL) + return; + + // Skip over "return" until we can properly parse it. + if (Current->is(tok::kw_return)) + next(); + + // Eagerly consume trailing comments. + while (isTrailingComment(Current)) { + next(); + } + + AnnotatedToken *Start = Current; + bool OperatorFound = false; + + while (Current != NULL) { + // Consume operators with higher precedence. + parse(Precedence + 1); + + // At the end of the line or when an operator with higher precedence is + // found, insert fake parenthesis and return. + if (Current == NULL || Current->is(tok::semi) || closesScope(*Current) || + ((Current->Type == TT_BinaryOperator || Current->is(tok::comma)) && + getPrecedence(*Current) < Precedence)) { + if (OperatorFound) { + ++Start->FakeLParens; + if (Current != NULL) + ++Current->FakeRParens; + } + return; + } + + // Consume scopes: (), [], <> and {} + if (opensScope(*Current)) { + while (Current != NULL && !closesScope(*Current)) { + next(); + parse(); + } + next(); + } else { + // Operator found. + if (getPrecedence(*Current) == Precedence) + OperatorFound = true; + + next(); + } + } + } + +private: + void next() { + if (Current != NULL) + Current = Current->Children.empty() ? NULL : &Current->Children[0]; + } + + bool closesScope(const AnnotatedToken &Tok) { + return Current->is(tok::r_paren) || Current->Type == TT_TemplateCloser || + Current->is(tok::r_brace) || Current->is(tok::r_square); + } + + bool opensScope(const AnnotatedToken &Tok) { + return Current->is(tok::l_paren) || Current->Type == TT_TemplateOpener || + Current->is(tok::l_brace) || Current->is(tok::l_square); + } + + AnnotatedToken *Current; +}; + void TokenAnnotator::annotate(AnnotatedLine &Line) { AnnotatingParser Parser(SourceMgr, Lex, Line); Line.Type = Parser.parseLine(); if (Line.Type == LT_Invalid) return; + ExpressionParser ExprParser(Line); + ExprParser.parse(); + if (Line.First.Type == TT_ObjCMethodSpecifier) Line.Type = LT_ObjCMethodDecl; else if (Line.First.Type == TT_ObjCDecl) @@ -712,8 +797,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->MustBreakBefore = true; } else if (Current->Type == TT_LineComment) { Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; - } else if ((Current->Parent->is(tok::comment) && - Current->FormatTok.NewlinesBefore > 0) || + } else if (isTrailingComment(Current->Parent) || (Current->is(tok::string_literal) && Current->Parent->is(tok::string_literal))) { Current->MustBreakBefore = true; @@ -839,8 +923,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Left.isNot(tok::star) && Left.isNot(tok::amp) && !Style.PointerBindsToType); if (Left.is(tok::amp) || Left.is(tok::star)) - return Right.FormatTok.Tok.isLiteral() || - Style.PointerBindsToType; + return Right.FormatTok.Tok.isLiteral() || Style.PointerBindsToType; if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; if (Left.is(tok::l_square) || Right.is(tok::r_square)) @@ -911,8 +994,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, (Tok.Parent->isNot(tok::colon) || Tok.Parent->Type != TT_ObjCMethodExpr); if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { - return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == - TT_TemplateCloser && Style.Standard != FormatStyle::LS_Cpp11; + return Tok.Type == TT_TemplateCloser && + Tok.Parent->Type == TT_TemplateCloser && + Style.Standard != FormatStyle::LS_Cpp11; } if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) return true; diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h index 0a0699d..dc936e4 100644 --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -71,7 +71,8 @@ public: CanBreakBefore(false), MustBreakBefore(false), ClosesTemplateDeclaration(false), MatchingParen(NULL), ParameterCount(1), BindingStrength(0), SplitPenalty(0), - LongestObjCSelectorName(0), Parent(NULL) { + LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0), + FakeRParens(0) { } bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } @@ -118,6 +119,11 @@ public: std::vector Children; AnnotatedToken *Parent; + /// \brief Insert this many fake ( before this token for correct indentation. + unsigned FakeLParens; + /// \brief Insert this many fake ) before this token for correct indentation. + unsigned FakeRParens; + const AnnotatedToken *getPreviousNoneComment() const { AnnotatedToken *Tok = Parent; while (Tok != NULL && Tok->is(tok::comment)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 29d2f99..7176fa5 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -1192,6 +1192,13 @@ TEST_F(FormatTest, BreaksAccordingToOperatorPrecedence) { verifyFormat( "if ((aaaaaaaaaaaaaaaaaaaaaaaaa || bbbbbbbbbbbbbbbbbbbbbbbbb) &&\n" " ccccccccccccccccccccccccc) {\n}"); + verifyFormat("return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||\n" + " bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||\n" + " cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||\n" + " dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;"); + verifyFormat("if ((aaaaaaaaaa != aaaaaaaaaaaaaaa ||\n" + " aaaaaaaaaaaaaaaaaaaaaaaa() >= aaaaaaaaaaaaaaaaaaaa) &&\n" + " aaaaaaaaaaaaaaa != aa) {\n}"); } TEST_F(FormatTest, BreaksAfterAssignments) { @@ -1275,7 +1282,7 @@ TEST_F(FormatTest, DeclarationsOfMultipleVariables) { verifyFormat("bool aaaaaaaaaaaaaaaaaaaaaaaaa =\n" " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaa),\n" " bbbbbbbbbbbbbbbbbbbbbbbbb =\n" - " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);"); + " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);"); // FIXME: This is bad as we hide "d". verifyFormat( @@ -1391,6 +1398,9 @@ TEST_F(FormatTest, WrapsTemplateDeclarations) { "virtual void loooooooooooongFunction(int Param1, int Param2);"); verifyFormat( "template \n" + "using comment_to_xml_conversion = comment_to_xml_conversion;"); + verifyFormat( + "template \n" "void f(int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram1,\n" " int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram2);"); verifyFormat(