From f79351157959adde9a65d99075590a9db613a77b Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Mon, 3 Dec 2012 18:12:45 +0000 Subject: [PATCH] Initial version of formatting library. This formatting library will be used by a stand-alone clang-format tool and can also be used when writing other refactorings. Manuel's original design document: https://docs.google.com/a/google.com/document/d/1gpckL2U_6QuU9YW2L1ABsc4Fcogn5UngKk7fE5dDOoA/edit The library can already successfully format itself. Review: http://llvm-reviews.chandlerc.com/D80 llvm-svn: 169137 --- clang/include/clang/Format/Format.h | 75 ++++ clang/lib/CMakeLists.txt | 1 + clang/lib/Format/CMakeLists.txt | 24 ++ clang/lib/Format/Format.cpp | 704 +++++++++++++++++++++++++++++++ clang/lib/Format/Makefile | 13 + clang/lib/Format/UnwrappedLineParser.cpp | 341 +++++++++++++++ clang/lib/Format/UnwrappedLineParser.h | 119 ++++++ clang/lib/Makefile | 2 +- clang/unittests/CMakeLists.txt | 1 + clang/unittests/Format/CMakeLists.txt | 17 + clang/unittests/Format/FormatTest.cpp | 336 +++++++++++++++ clang/unittests/Format/Makefile | 19 + 12 files changed, 1651 insertions(+), 1 deletion(-) create mode 100644 clang/include/clang/Format/Format.h create mode 100644 clang/lib/Format/CMakeLists.txt create mode 100644 clang/lib/Format/Format.cpp create mode 100644 clang/lib/Format/Makefile create mode 100644 clang/lib/Format/UnwrappedLineParser.cpp create mode 100644 clang/lib/Format/UnwrappedLineParser.h create mode 100644 clang/unittests/Format/CMakeLists.txt create mode 100644 clang/unittests/Format/FormatTest.cpp create mode 100644 clang/unittests/Format/Makefile diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h new file mode 100644 index 0000000..c2deccf --- /dev/null +++ b/clang/include/clang/Format/Format.h @@ -0,0 +1,75 @@ +//===--- Format.h - Format C++ code -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Various functions to configurably format source code. +/// +/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, +/// where it can be used to format real code. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_FORMAT_H_ +#define LLVM_CLANG_FORMAT_FORMAT_H + +#include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/Refactoring.h" + +namespace clang { + +class Lexer; +class SourceManager; + +namespace format { + +/// \brief The \c FormatStyle is used to configure the formatting to follow +/// specific guidelines. +struct FormatStyle { + /// \brief The column limit. + unsigned ColumnLimit; + + /// \brief The maximum number of consecutive empty lines to keep. + unsigned MaxEmptyLinesToKeep; + + /// \brief Set whether & and * bind to the type as opposed to the variable. + bool PointerAndReferenceBindToType; + + /// \brief The extra indent or outdent of access modifiers (e.g.: public:). + int AccessModifierOffset; + + /// \brief Split two consecutive closing '>' by a space, i.e. use + /// A > instead of A>. + bool SplitTemplateClosingGreater; +}; + +/// \brief Returns a format style complying with the LLVM coding standards: +/// http://llvm.org/docs/CodingStandards.html. +FormatStyle getLLVMStyle(); + +/// \brief Returns a format style complying with Google's C++ style guide: +/// http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml. +FormatStyle getGoogleStyle(); + +/// \brief Reformats the given \p Ranges in the token stream coming out of +/// \c Lex. +/// +/// Each range is extended on either end to its next bigger logic unit, i.e. +/// everything that might influence its formatting or might be influenced by its +/// formatting. +/// +/// Returns the \c Replacements necessary to make all \p Ranges comply with +/// \p Style. +tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, + SourceManager &SourceMgr, + std::vector Ranges); + +} // end namespace format +} // end namespace clang + +#endif // LLVM_CLANG_FORMAT_FORMAT_H diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt index 206c228..053320c 100644 --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -16,3 +16,4 @@ add_subdirectory(Frontend) add_subdirectory(FrontendTool) add_subdirectory(Tooling) add_subdirectory(StaticAnalyzer) +add_subdirectory(Format) diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt new file mode 100644 index 0000000..60ab14d --- /dev/null +++ b/clang/lib/Format/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_library(clangFormat + UnwrappedLineParser.cpp + Format.cpp + ) + +add_dependencies(clangFormat + ClangAttrClasses + ClangAttrList + ClangDeclNodes + ClangDiagnosticCommon + ClangDiagnosticFrontend + ClangStmtNodes + ) + +target_link_libraries(clangFormat + clangBasic + clangFrontend + clangAST + clangASTMatchers + clangRewriteCore + clangRewriteFrontend + ) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp new file mode 100644 index 0000000..a967768 --- /dev/null +++ b/clang/lib/Format/Format.cpp @@ -0,0 +1,704 @@ +//===--- Format.cpp - Format C++ code -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements functions declared in Format.h. This will be +/// split into separate files as we go. +/// +/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, +/// where it can be used to format real code. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Format/Format.h" + +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +#include "UnwrappedLineParser.h" + +namespace clang { +namespace format { + +// FIXME: Move somewhere sane. +struct TokenAnnotation { + enum TokenType { TT_Unknown, TT_TemplateOpener, TT_BinaryOperator, + TT_UnaryOperator, TT_OverloadedOperator, TT_PointerOrReference, + TT_ConditionalExpr, TT_LineComment, TT_BlockComment }; + + TokenType Type; + + /// \brief The current parenthesis level, i.e. the number of opening minus + /// the number of closing parenthesis left of the current position. + unsigned ParenLevel; + + bool SpaceRequiredBefore; + bool CanBreakBefore; + bool MustBreakBefore; +}; + +using llvm::MutableArrayRef; + +FormatStyle getLLVMStyle() { + FormatStyle LLVMStyle; + LLVMStyle.ColumnLimit = 80; + LLVMStyle.MaxEmptyLinesToKeep = 1; + LLVMStyle.PointerAndReferenceBindToType = false; + LLVMStyle.AccessModifierOffset = -2; + LLVMStyle.SplitTemplateClosingGreater = true; + return LLVMStyle; +} + +FormatStyle getGoogleStyle() { + FormatStyle GoogleStyle; + GoogleStyle.ColumnLimit = 80; + GoogleStyle.MaxEmptyLinesToKeep = 1; + GoogleStyle.PointerAndReferenceBindToType = true; + GoogleStyle.AccessModifierOffset = -1; + GoogleStyle.SplitTemplateClosingGreater = false; + return GoogleStyle; +} + +struct OptimizationParameters { + unsigned PenaltyExtraLine; + unsigned PenaltyIndentLevel; +}; + +class UnwrappedLineFormatter { +public: + UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, + const UnwrappedLine &Line, + const std::vector &Annotations, + tooling::Replacements &Replaces) + : Style(Style), + SourceMgr(SourceMgr), + Line(Line), + Annotations(Annotations), + Replaces(Replaces) { + Parameters.PenaltyExtraLine = 100; + Parameters.PenaltyIndentLevel = 5; + } + + void format() { + formatFirstToken(); + count = 0; + IndentState State; + State.Column = Line.Level * 2 + Line.Tokens[0].Tok.getLength(); + State.CtorInitializerOnNewLine = false; + State.InCtorInitializer = false; + State.ConsumedTokens = 1; + + //State.UsedIndent.push_back(Line.Level * 2); + State.Indent.push_back(Line.Level * 2 + 4); + State.LastSpace.push_back(Line.Level * 2); + + // Start iterating at 1 as we have correctly formatted of Token #0 above. + for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) { + unsigned NoBreak = calcPenalty(State, false, UINT_MAX); + unsigned Break = calcPenalty(State, true, NoBreak); + addToken(Break < NoBreak, false, State); + } + } + +private: + /// \brief The current state when indenting a unwrapped line. + /// + /// As the indenting tries different combinations this is copied by value. + struct IndentState { + /// \brief The number of used columns in the current line. + unsigned Column; + + /// \brief The number of tokens already consumed. + unsigned ConsumedTokens; + + /// \brief The position to which a specific parenthesis level needs to be + /// indented. + std::vector Indent; + + std::vector LastSpace; + + bool CtorInitializerOnNewLine; + bool InCtorInitializer; + + /// \brief Comparison operator to be able to used \c IndentState in \c map. + bool operator<(const IndentState &Other) const { + if (Other.ConsumedTokens != ConsumedTokens) + return Other.ConsumedTokens > ConsumedTokens; + if (Other.Column != Column) + return Other.Column > Column; + if (Other.Indent.size() != Indent.size()) + return Other.Indent.size() > Indent.size(); + for (int i = 0, e = Indent.size(); i != e; ++i) { + if (Other.Indent[i] != Indent[i]) + return Other.Indent[i] > Indent[i]; + } + if (Other.LastSpace.size() != LastSpace.size()) + return Other.LastSpace.size() > LastSpace.size(); + for (int i = 0, e = LastSpace.size(); i != e; ++i) { + if (Other.LastSpace[i] != LastSpace[i]) + return Other.LastSpace[i] > LastSpace[i]; + } + return false; + } + }; + + /// Append the next token to \p State. + void addToken(bool Newline, bool DryRun, IndentState &State) { + unsigned Index = State.ConsumedTokens; + const FormatToken &Current = Line.Tokens[Index]; + const FormatToken &Previous = Line.Tokens[Index - 1]; + unsigned ParenLevel = Annotations[Index].ParenLevel; + + if (Current.Tok.is(tok::l_paren) || Current.Tok.is(tok::l_square) || + Annotations[Index].Type == TokenAnnotation::TT_TemplateOpener) { + State.Indent.push_back(4 + State.LastSpace.back()); + State.LastSpace.push_back(State.LastSpace.back()); + } + + if (Newline) { + if (Current.Tok.is(tok::string_literal) && + Previous.Tok.is(tok::string_literal)) + State.Column = State.Column - Previous.Tok.getLength(); + else if (Previous.Tok.is(tok::equal) && ParenLevel != 0) + State.Column = State.Indent[ParenLevel] + 4; + else + State.Column = State.Indent[ParenLevel]; + if (!DryRun) + replaceWhitespace(Current, 1, State.Column); + + State.Column += Current.Tok.getLength(); + State.LastSpace[ParenLevel] = State.Indent[ParenLevel]; + if (Current.Tok.is(tok::colon) && + Annotations[Index].Type != TokenAnnotation::TT_ConditionalExpr) { + State.Indent[ParenLevel] += 2; + State.CtorInitializerOnNewLine = true; + State.InCtorInitializer = true; + } + } else { + unsigned Spaces = Annotations[Index].SpaceRequiredBefore ? 1 : 0; + if (Annotations[Index].Type == TokenAnnotation::TT_LineComment) + Spaces = 2; + if (!DryRun) + replaceWhitespace(Current, 0, Spaces); + if (Previous.Tok.is(tok::l_paren)) + State.Indent[ParenLevel] = State.Column; + if (Previous.Tok.is(tok::less) && + Annotations[Index - 1].Type == TokenAnnotation::TT_TemplateOpener) + State.Indent[ParenLevel] = State.Column; + if (Current.Tok.is(tok::colon)) { + State.Indent[ParenLevel] = State.Column + 3; + State.InCtorInitializer = true; + } + // Top-level spaces are exempt as that mostly leads to better results. + if (Spaces > 0 && ParenLevel != 0) + State.LastSpace[ParenLevel] = State.Column + Spaces; + State.Column += Current.Tok.getLength() + Spaces; + } + + if (Current.Tok.is(tok::r_paren) || Current.Tok.is(tok::r_square) || + Annotations[Index].Type == TokenAnnotation::TT_TemplateOpener) { + State.Indent.pop_back(); + State.LastSpace.pop_back(); + } + + ++State.ConsumedTokens; + } + + typedef std::map StateMap; + StateMap Memory; + + unsigned splitPenalty(const FormatToken &Token) { + if (Token.Tok.is(tok::semi)) + return 0; + if (Token.Tok.is(tok::comma)) + return 1; + if (Token.Tok.is(tok::equal) || Token.Tok.is(tok::l_paren) || + Token.Tok.is(tok::pipepipe) || Token.Tok.is(tok::ampamp)) + return 2; + return 3; + } + + /// \brief Calculate the number of lines needed to format the remaining part + /// of the unwrapped line. + /// + /// Assumes the formatting so far has led to + /// the \c IndentState \p State. If \p NewLine is set, a new line will be + /// added after the previous token. + /// + /// \param StopAt is used for optimization. If we can determine that we'll + /// definitely need at least \p StopAt additional lines, we already know of a + /// better solution. + unsigned calcPenalty(IndentState State, bool NewLine, unsigned StopAt) { + // We are at the end of the unwrapped line, so we don't need any more lines. + if (State.ConsumedTokens >= Line.Tokens.size()) + return 0; + + if (!NewLine && Annotations[State.ConsumedTokens].MustBreakBefore) + return UINT_MAX; + if (NewLine && !Annotations[State.ConsumedTokens].CanBreakBefore) + return UINT_MAX; + + if (State.ConsumedTokens > 0 && !NewLine && + State.CtorInitializerOnNewLine && + Line.Tokens[State.ConsumedTokens - 1].Tok.is(tok::comma)) + return UINT_MAX; + + if (NewLine && State.InCtorInitializer && !State.CtorInitializerOnNewLine) + return UINT_MAX; + + addToken(NewLine, true, State); + + // Exceeding column limit is bad. + if (State.Column > Style.ColumnLimit) + return UINT_MAX; + + unsigned CurrentPenalty = 0; + if (NewLine) { + CurrentPenalty += Parameters.PenaltyIndentLevel * + Annotations[State.ConsumedTokens - 1].ParenLevel + + Parameters.PenaltyExtraLine + + splitPenalty(Line.Tokens[State.ConsumedTokens - 2]); + } + + if (StopAt <= CurrentPenalty) + return UINT_MAX; + StopAt -= CurrentPenalty; + + // Has this state already been examined? + StateMap::iterator I = Memory.find(State); + if (I != Memory.end()) + return I->second; + ++count; + + unsigned NoBreak = calcPenalty(State, false, StopAt); + unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak)); + unsigned Result = std::min(NoBreak, WithBreak); + if (Result != UINT_MAX) + Result += CurrentPenalty; + Memory[State] = Result; + assert(Memory.find(State) != Memory.end()); + return Result; + } + + /// \brief Replaces the whitespace in front of \p Tok. Only call once for + /// each \c FormatToken. + void replaceWhitespace(const FormatToken &Tok, unsigned NewLines, + unsigned Spaces) { + Replaces.insert(tooling::Replacement( + SourceMgr, Tok.WhiteSpaceStart, Tok.WhiteSpaceLength, + std::string(NewLines, '\n') + std::string(Spaces, ' '))); + } + + /// \brief Add a new line and the required indent before the first Token + /// of the \c UnwrappedLine. + void formatFirstToken() { + const FormatToken &Token = Line.Tokens[0]; + if (Token.WhiteSpaceStart.isValid()) { + unsigned Newlines = + std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); + unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart); + if (Newlines == 0 && Offset != 0) + Newlines = 1; + unsigned Indent = Line.Level * 2; + if (Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) || + Token.Tok.is(tok::kw_private)) + Indent += Style.AccessModifierOffset; + replaceWhitespace(Token, Newlines, Indent); + } + } + + FormatStyle Style; + SourceManager &SourceMgr; + const UnwrappedLine &Line; + const std::vector &Annotations; + tooling::Replacements &Replaces; + unsigned int count; + + OptimizationParameters Parameters; +}; + +/// \brief Determines extra information about the tokens comprising an +/// \c UnwrappedLine. +class TokenAnnotator { +public: + TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style, + SourceManager &SourceMgr) + : Line(Line), + Style(Style), + SourceMgr(SourceMgr) { + } + + /// \brief A parser that gathers additional information about tokens. + /// + /// The \c TokenAnnotator tries to matches parenthesis and square brakets and + /// store a parenthesis levels. It also tries to resolve matching "<" and ">" + /// into template parameter lists. + class AnnotatingParser { + public: + AnnotatingParser(const SourceManager &SourceMgr, + const SmallVector &Tokens, + std::vector &Annotations) + : SourceMgr(SourceMgr), + Tokens(Tokens), + Annotations(Annotations), + Index(0) { + } + + bool parseAngle(unsigned Level) { + while (Index < Tokens.size()) { + if (Tokens[Index].Tok.is(tok::greater)) { + Annotations[Index].Type = TokenAnnotation::TT_TemplateOpener; + Annotations[Index].ParenLevel = Level; + next(); + return true; + } + if (Tokens[Index].Tok.is(tok::r_paren) || + Tokens[Index].Tok.is(tok::r_square)) + return false; + if (Tokens[Index].Tok.is(tok::pipepipe) || + Tokens[Index].Tok.is(tok::ampamp) || + Tokens[Index].Tok.is(tok::question) || + Tokens[Index].Tok.is(tok::colon)) + return false; + consumeToken(Level); + } + return false; + } + + bool parseParens(unsigned Level) { + while (Index < Tokens.size()) { + if (Tokens[Index].Tok.is(tok::r_paren)) { + Annotations[Index].ParenLevel = Level; + next(); + return true; + } + if (Tokens[Index].Tok.is(tok::r_square)) + return false; + consumeToken(Level); + } + return false; + } + + bool parseSquare(unsigned Level) { + while (Index < Tokens.size()) { + if (Tokens[Index].Tok.is(tok::r_square)) { + Annotations[Index].ParenLevel = Level; + next(); + return true; + } + if (Tokens[Index].Tok.is(tok::r_paren)) + return false; + consumeToken(Level); + } + return false; + } + + bool parseConditional(unsigned Level) { + while (Index < Tokens.size()) { + if (Tokens[Index].Tok.is(tok::colon)) { + Annotations[Index].Type = TokenAnnotation::TT_ConditionalExpr; + next(); + return true; + } + consumeToken(Level); + } + return false; + } + + void consumeToken(unsigned Level) { + Annotations[Index].ParenLevel = Level; + unsigned CurrentIndex = Index; + next(); + switch (Tokens[CurrentIndex].Tok.getKind()) { + case tok::l_paren: + parseParens(Level + 1); + break; + case tok::l_square: + parseSquare(Level + 1); + break; + case tok::less: + if (parseAngle(Level + 1)) + Annotations[CurrentIndex].Type = TokenAnnotation::TT_TemplateOpener; + else { + Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator; + Index = CurrentIndex + 1; + } + break; + case tok::greater: + Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator; + break; + case tok::kw_operator: + if (!Tokens[Index].Tok.is(tok::l_paren)) + Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator; + next(); + break; + case tok::question: + parseConditional(Level); + break; + default: + break; + } + } + + void parseLine() { + while (Index < Tokens.size()) { + consumeToken(0); + } + } + + void next() { + ++Index; + } + + private: + const SourceManager &SourceMgr; + const SmallVector &Tokens; + std::vector &Annotations; + unsigned Index; + }; + + void annotate() { + Annotations.clear(); + for (int i = 0, e = Line.Tokens.size(); i != e; ++i) { + Annotations.push_back(TokenAnnotation()); + } + + AnnotatingParser Parser(SourceMgr, Line.Tokens, Annotations); + Parser.parseLine(); + + determineTokenTypes(); + + for (int i = 1, e = Line.Tokens.size(); i != e; ++i) { + TokenAnnotation &Annotation = Annotations[i]; + + Annotation.CanBreakBefore = + canBreakBetween(Line.Tokens[i - 1], Line.Tokens[i]); + + if (Line.Tokens[i].Tok.is(tok::colon)) { + if (Line.Tokens[0].Tok.is(tok::kw_case) || i == e - 1) { + Annotation.SpaceRequiredBefore = false; + } else { + Annotation.SpaceRequiredBefore = TokenAnnotation::TT_ConditionalExpr; + } + } else if (Annotations[i - 1].Type == TokenAnnotation::TT_UnaryOperator) { + Annotation.SpaceRequiredBefore = false; + } else if (Annotation.Type == TokenAnnotation::TT_UnaryOperator) { + Annotation.SpaceRequiredBefore = + Line.Tokens[i - 1].Tok.isNot(tok::l_paren); + } else if (Line.Tokens[i - 1].Tok.is(tok::greater) && + Line.Tokens[i].Tok.is(tok::greater)) { + if (Annotation.Type == TokenAnnotation::TT_TemplateOpener && + Annotations[i - 1].Type == TokenAnnotation::TT_TemplateOpener) + Annotation.SpaceRequiredBefore = Style.SplitTemplateClosingGreater; + else + Annotation.SpaceRequiredBefore = false; + } else if ( + Annotation.Type == TokenAnnotation::TT_BinaryOperator || + Annotations[i - 1].Type == TokenAnnotation::TT_BinaryOperator) { + Annotation.SpaceRequiredBefore = true; + } else if ( + Annotations[i - 1].Type == TokenAnnotation::TT_TemplateOpener && + Line.Tokens[i].Tok.is(tok::l_paren)) { + Annotation.SpaceRequiredBefore = false; + } else { + Annotation.SpaceRequiredBefore = + spaceRequiredBetween(Line.Tokens[i - 1].Tok, Line.Tokens[i].Tok); + } + + if (Annotations[i - 1].Type == TokenAnnotation::TT_LineComment || + (Line.Tokens[i].Tok.is(tok::string_literal) && + Line.Tokens[i - 1].Tok.is(tok::string_literal))) { + Annotation.MustBreakBefore = true; + } + + if (Annotation.MustBreakBefore) + Annotation.CanBreakBefore = true; + } + } + + const std::vector &getAnnotations() { + return Annotations; + } + +private: + void determineTokenTypes() { + for (int i = 0, e = Line.Tokens.size(); i != e; ++i) { + TokenAnnotation &Annotation = Annotations[i]; + const FormatToken &Tok = Line.Tokens[i]; + + if (Tok.Tok.is(tok::star) || Tok.Tok.is(tok::amp)) + Annotation.Type = determineStarAmpUsage(i); + else if (Tok.Tok.is(tok::minus) && Line.Tokens[i - 1].Tok.is(tok::equal)) + Annotation.Type = TokenAnnotation::TT_UnaryOperator; + else if (isBinaryOperator(Line.Tokens[i])) + Annotation.Type = TokenAnnotation::TT_BinaryOperator; + else if (Tok.Tok.is(tok::comment)) { + StringRef Data(SourceMgr.getCharacterData(Tok.Tok.getLocation()), + Tok.Tok.getLength()); + if (Data.startswith("//")) + Annotation.Type = TokenAnnotation::TT_LineComment; + else + Annotation.Type = TokenAnnotation::TT_BlockComment; + } + } + } + + bool isBinaryOperator(const FormatToken &Tok) { + switch (Tok.Tok.getKind()) { + case tok::equal: + case tok::equalequal: + case tok::star: + //case tok::amp: + case tok::plus: + case tok::slash: + case tok::minus: + case tok::ampamp: + case tok::pipe: + case tok::pipepipe: + case tok::percent: + return true; + default: + return false; + } + } + + TokenAnnotation::TokenType determineStarAmpUsage(unsigned Index) { + if (Index == Annotations.size()) + return TokenAnnotation::TT_Unknown; + + if (Index == 0 || Line.Tokens[Index - 1].Tok.is(tok::l_paren) || + Line.Tokens[Index - 1].Tok.is(tok::comma) || + Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator) + return TokenAnnotation::TT_UnaryOperator; + + if (Line.Tokens[Index - 1].Tok.isLiteral() || + Line.Tokens[Index + 1].Tok.isLiteral()) + return TokenAnnotation::TT_BinaryOperator; + + return TokenAnnotation::TT_PointerOrReference; + } + + bool isIfForOrWhile(Token Tok) { + return Tok.is(tok::kw_if) || Tok.is(tok::kw_for) || Tok.is(tok::kw_while); + } + + bool spaceRequiredBetween(Token Left, Token Right) { + if (Left.is(tok::kw_template) && Right.is(tok::less)) + return true; + if (Left.is(tok::arrow) || Right.is(tok::arrow)) + return false; + if (Left.is(tok::exclaim) || Left.is(tok::tilde)) + return false; + if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) + return false; + if (Left.is(tok::amp) || Left.is(tok::star)) + return Right.isLiteral() || Style.PointerAndReferenceBindToType; + if (Right.is(tok::star) && Left.is(tok::l_paren)) + return false; + if (Right.is(tok::amp) || Right.is(tok::star)) + return Left.isLiteral() || !Style.PointerAndReferenceBindToType; + if (Left.is(tok::l_square) || Right.is(tok::l_square) || + Right.is(tok::r_square)) + return false; + if (Left.is(tok::coloncolon) || Right.is(tok::coloncolon)) + return false; + if (Left.is(tok::period) || Right.is(tok::period)) + return false; + if (Left.is(tok::colon) || Right.is(tok::colon)) + return true; + if ((Left.is(tok::plusplus) && Right.isAnyIdentifier()) || + (Left.isAnyIdentifier() && Right.is(tok::plusplus)) || + (Left.is(tok::minusminus) && Right.isAnyIdentifier()) || + (Left.isAnyIdentifier() && Right.is(tok::minusminus))) + return false; + if (Left.is(tok::l_paren)) + return false; + if (Left.is(tok::hash)) + return false; + if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) + return false; + if (Right.is(tok::l_paren)) { + return !Left.isAnyIdentifier() || isIfForOrWhile(Left); + } + return true; + } + + bool canBreakBetween(const FormatToken &Left, const FormatToken &Right) { + if (Right.Tok.is(tok::r_paren)) + return false; + if (isBinaryOperator(Left)) + return true; + return Right.Tok.is(tok::colon) || Left.Tok.is(tok::comma) || Left.Tok.is( + tok::semi) || Left.Tok.is(tok::equal) || Left.Tok.is(tok::ampamp) || + (Left.Tok.is(tok::l_paren) && !Right.Tok.is(tok::r_paren)); + } + + const UnwrappedLine &Line; + FormatStyle Style; + SourceManager &SourceMgr; + std::vector Annotations; +}; + +class Formatter : public UnwrappedLineConsumer { +public: + Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, + const std::vector &Ranges) + : Style(Style), + Lex(Lex), + SourceMgr(SourceMgr), + Ranges(Ranges) { + } + + tooling::Replacements format() { + UnwrappedLineParser Parser(Lex, SourceMgr, *this); + Parser.parse(); + return Replaces; + } + +private: + virtual void formatUnwrappedLine(const UnwrappedLine &TheLine) { + if (TheLine.Tokens.size() == 0) + return; + + CharSourceRange LineRange = + CharSourceRange::getTokenRange(TheLine.Tokens.front().Tok.getLocation(), + TheLine.Tokens.back().Tok.getLocation()); + + for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { + if (SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(), + Ranges[i].getBegin()) || + SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), + LineRange.getBegin())) + continue; + + TokenAnnotator Annotator(TheLine, Style, SourceMgr); + Annotator.annotate(); + UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, + Annotator.getAnnotations(), Replaces); + Formatter.format(); + return; + } + } + + FormatStyle Style; + Lexer &Lex; + SourceManager &SourceMgr; + tooling::Replacements Replaces; + std::vector Ranges; +}; + +tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, + SourceManager &SourceMgr, + std::vector Ranges) { + Formatter formatter(Style, Lex, SourceMgr, Ranges); + return formatter.format(); +} + +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/Makefile b/clang/lib/Format/Makefile new file mode 100644 index 0000000..0d2e7a2 --- /dev/null +++ b/clang/lib/Format/Makefile @@ -0,0 +1,13 @@ +##===- clang/lib/Tooling/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL := ../.. +LIBRARYNAME := clangTooling + +include $(CLANG_LEVEL)/Makefile diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp new file mode 100644 index 0000000..8dc1278 --- /dev/null +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -0,0 +1,341 @@ +//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the implementation of the UnwrappedLineParser, +/// which turns a stream of tokens into UnwrappedLines. +/// +/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, +/// where it can be used to format real code. +/// +//===----------------------------------------------------------------------===// + +#include "UnwrappedLineParser.h" + +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace format { + +UnwrappedLineParser::UnwrappedLineParser(Lexer &Lex, SourceManager &SourceMgr, + UnwrappedLineConsumer &Callback) + : GreaterStashed(false), + Lex(Lex), + SourceMgr(SourceMgr), + IdentTable(Lex.getLangOpts()), + Callback(Callback) { + Lex.SetKeepWhitespaceMode(true); +} + +void UnwrappedLineParser::parse() { + parseToken(); + parseLevel(); +} + +void UnwrappedLineParser::parseLevel() { + do { + switch (FormatTok.Tok.getKind()) { + case tok::hash: + parsePPDirective(); + break; + case tok::comment: + parseComment(); + break; + case tok::l_brace: + parseBlock(); + addUnwrappedLine(); + break; + case tok::r_brace: + return; + default: + parseStatement(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseBlock() { + nextToken(); + + // FIXME: Remove this hack to handle namespaces. + bool IsNamespace = Line.Tokens[0].Tok.is(tok::kw_namespace); + + addUnwrappedLine(); + + if (!IsNamespace) + ++Line.Level; + parseLevel(); + if (!IsNamespace) + --Line.Level; + assert(FormatTok.Tok.is(tok::r_brace) && "expected '}'"); + nextToken(); + if (FormatTok.Tok.is(tok::semi)) + nextToken(); +} + +void UnwrappedLineParser::parsePPDirective() { + while (!eof()) { + nextToken(); + if (FormatTok.NewlinesBefore > 0) { + addUnwrappedLine(); + return; + } + } +} + +void UnwrappedLineParser::parseComment() { + while (!eof()) { + nextToken(); + if (FormatTok.NewlinesBefore > 0) { + addUnwrappedLine(); + return; + } + } +} + +void UnwrappedLineParser::parseStatement() { + if (FormatTok.Tok.is(tok::kw_public) || FormatTok.Tok.is(tok::kw_protected) || + FormatTok.Tok.is(tok::kw_private)) { + parseAccessSpecifier(); + return; + } + if (FormatTok.Tok.is(tok::kw_enum)) { + parseEnum(); + return; + } + int TokenNumber = 0; + do { + ++TokenNumber; + switch (FormatTok.Tok.getKind()) { + case tok::semi: + nextToken(); + addUnwrappedLine(); + return; + case tok::l_paren: + parseParens(); + break; + case tok::l_brace: + parseBlock(); + addUnwrappedLine(); + return; + case tok::kw_if: + parseIfThenElse(); + return; + case tok::kw_do: + parseDoWhile(); + return; + case tok::kw_switch: + parseSwitch(); + return; + case tok::kw_default: + nextToken(); + parseLabel(); + return; + case tok::kw_case: + parseCaseLabel(); + return; + case tok::raw_identifier: + nextToken(); + break; + default: + nextToken(); + if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { + parseLabel(); + return; + } + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseParens() { + assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); + nextToken(); + do { + switch (FormatTok.Tok.getKind()) { + case tok::l_paren: + parseParens(); + break; + case tok::r_paren: + nextToken(); + return; + default: + nextToken(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseIfThenElse() { + assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); + nextToken(); + parseParens(); + bool NeedsUnwrappedLine = false; + if (FormatTok.Tok.is(tok::l_brace)) { + parseBlock(); + NeedsUnwrappedLine = true; + } else { + addUnwrappedLine(); + ++Line.Level; + parseStatement(); + --Line.Level; + } + if (FormatTok.Tok.is(tok::kw_else)) { + nextToken(); + if (FormatTok.Tok.is(tok::l_brace)) { + parseBlock(); + addUnwrappedLine(); + } else if (FormatTok.Tok.is(tok::kw_if)) { + parseIfThenElse(); + } else { + addUnwrappedLine(); + ++Line.Level; + parseStatement(); + --Line.Level; + } + } else if (NeedsUnwrappedLine) { + addUnwrappedLine(); + } +} + +void UnwrappedLineParser::parseDoWhile() { + assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); + nextToken(); + if (FormatTok.Tok.is(tok::l_brace)) { + parseBlock(); + } else { + addUnwrappedLine(); + ++Line.Level; + parseStatement(); + --Line.Level; + } + + assert(FormatTok.Tok.is(tok::kw_while) && "'while' expected"); + nextToken(); + parseStatement(); +} + +void UnwrappedLineParser::parseLabel() { + // FIXME: remove all asserts. + assert(FormatTok.Tok.is(tok::colon) && "':' expected"); + nextToken(); + unsigned OldLineLevel = Line.Level; + if (Line.Level > 0) + --Line.Level; + if (FormatTok.Tok.is(tok::l_brace)) { + parseBlock(); + } + addUnwrappedLine(); + Line.Level = OldLineLevel; +} + +void UnwrappedLineParser::parseCaseLabel() { + assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); + // FIXME: fix handling of complex expressions here. + do { + nextToken(); + } while (!eof() && !FormatTok.Tok.is(tok::colon)); + parseLabel(); +} + +void UnwrappedLineParser::parseSwitch() { + assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); + nextToken(); + parseParens(); + if (FormatTok.Tok.is(tok::l_brace)) { + parseBlock(); + addUnwrappedLine(); + } else { + addUnwrappedLine(); + ++Line.Level; + parseStatement(); + --Line.Level; + } +} + +void UnwrappedLineParser::parseAccessSpecifier() { + nextToken(); + nextToken(); + addUnwrappedLine(); +} + +void UnwrappedLineParser::parseEnum() { + do { + nextToken(); + if (FormatTok.Tok.is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + return; + } + } while (!eof()); +} + +void UnwrappedLineParser::addUnwrappedLine() { + // Consume trailing comments. + while (!eof() && FormatTok.NewlinesBefore == 0 && + FormatTok.Tok.is(tok::comment)) { + nextToken(); + } + Callback.formatUnwrappedLine(Line); + Line.Tokens.clear(); +} + +bool UnwrappedLineParser::eof() const { + return FormatTok.Tok.is(tok::eof); +} + +void UnwrappedLineParser::nextToken() { + if (eof()) + return; + Line.Tokens.push_back(FormatTok); + parseToken(); +} + +void UnwrappedLineParser::parseToken() { + if (GreaterStashed) { + FormatTok.NewlinesBefore = 0; + FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation().getLocWithOffset(1); + FormatTok.WhiteSpaceLength = 0; + GreaterStashed = false; + return; + } + + FormatTok = FormatToken(); + Lex.LexFromRawLexer(FormatTok.Tok); + FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); + + // Consume and record whitespace until we find a significant token. + while (FormatTok.Tok.is(tok::unknown)) { + FormatTok.NewlinesBefore += tokenText().count('\n'); + FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); + + if (eof()) + return; + Lex.LexFromRawLexer(FormatTok.Tok); + } + + if (FormatTok.Tok.is(tok::raw_identifier)) { + const IdentifierInfo &Info = IdentTable.get(tokenText()); + FormatTok.Tok.setKind(Info.getTokenID()); + } + + if (FormatTok.Tok.is(tok::greatergreater)) { + FormatTok.Tok.setKind(tok::greater); + GreaterStashed = true; + } +} + +StringRef UnwrappedLineParser::tokenText() { + StringRef Data(SourceMgr.getCharacterData(FormatTok.Tok.getLocation()), + FormatTok.Tok.getLength()); + return Data; +} + +} // end namespace format +} // end namespace clang diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h new file mode 100644 index 0000000..1e6899e --- /dev/null +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -0,0 +1,119 @@ +//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declaration of the UnwrappedLineParser, +/// which turns a stream of tokens into UnwrappedLines. +/// +/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, +/// where it can be used to format real code. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H +#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H + +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace format { + +/// \brief A wrapper around a \c Token storing information about the +/// whitespace characters preceeding it. +struct FormatToken { + FormatToken() : NewlinesBefore(0), WhiteSpaceLength(0) { + } + + /// \brief The \c Token. + Token Tok; + + /// \brief The number of newlines immediately before the \c Token. + /// + /// This can be used to determine what the user wrote in the original code + /// and thereby e.g. leave an empty line between two function definitions. + unsigned NewlinesBefore; + + /// \brief The location of the start of the whitespace immediately preceeding + /// the \c Token. + /// + /// Used together with \c WhiteSpaceLength to create a \c Replacement. + SourceLocation WhiteSpaceStart; + + /// \brief The length in characters of the whitespace immediately preceeding + /// the \c Token. + unsigned WhiteSpaceLength; +}; + +/// \brief An unwrapped line is a sequence of \c Token, that we would like to +/// put on a single line if there was no column limit. +/// +/// This is used as a main interface between the \c UnwrappedLineParser and the +/// \c UnwrappedLineFormatter. The key property is that changing the formatting +/// within an unwrapped line does not affect any other unwrapped lines. +struct UnwrappedLine { + UnwrappedLine() : Level(0) { + } + + /// \brief The \c Token comprising this \c UnwrappedLine. + SmallVector Tokens; + + /// \brief The indent level of the \c UnwrappedLine. + unsigned Level; +}; + +class UnwrappedLineConsumer { +public: + virtual void formatUnwrappedLine(const UnwrappedLine &Line) = 0; +}; + +class UnwrappedLineParser { +public: + UnwrappedLineParser(Lexer &Lex, SourceManager &SourceMgr, + UnwrappedLineConsumer &Callback); + + void parse(); + +private: + void parseLevel(); + void parseBlock(); + void parsePPDirective(); + void parseComment(); + void parseStatement(); + void parseParens(); + void parseIfThenElse(); + void parseDoWhile(); + void parseLabel(); + void parseCaseLabel(); + void parseSwitch(); + void parseAccessSpecifier(); + void parseEnum(); + void addUnwrappedLine(); + bool eof() const; + void nextToken(); + void parseToken(); + + /// Returns the text of \c FormatTok. + StringRef tokenText(); + + UnwrappedLine Line; + FormatToken FormatTok; + bool GreaterStashed; + + Lexer &Lex; + SourceManager &SourceMgr; + IdentifierTable IdentTable; + UnwrappedLineConsumer &Callback; +}; + +} // end namespace format +} // end namespace clang + +#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H diff --git a/clang/lib/Makefile b/clang/lib/Makefile index 1f14aa0..17db50e 100755 --- a/clang/lib/Makefile +++ b/clang/lib/Makefile @@ -10,7 +10,7 @@ CLANG_LEVEL := .. PARALLEL_DIRS = Headers Basic Lex Parse AST ASTMatchers Sema CodeGen Analysis \ StaticAnalyzer Edit Rewrite ARCMigrate Serialization Frontend \ - FrontendTool Tooling Driver + FrontendTool Tooling Driver Format include $(CLANG_LEVEL)/Makefile diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index 989025a..334ea41 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -15,3 +15,4 @@ add_subdirectory(Basic) add_subdirectory(Lex) add_subdirectory(Frontend) add_subdirectory(Tooling) +add_subdirectory(Format) diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt new file mode 100644 index 0000000..66dd534 --- /dev/null +++ b/clang/unittests/Format/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + asmparser + support + mc + ) + +add_clang_unittest(FormatTests + FormatTest.cpp + ) + +target_link_libraries(FormatTests + clangAST + clangFormat + clangTooling + clangRewriteCore + ) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp new file mode 100644 index 0000000..9929eeb --- /dev/null +++ b/clang/unittests/Format/FormatTest.cpp @@ -0,0 +1,336 @@ +//===- unittest/Format/FormatTest.cpp - Formatting unit tests -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../Tooling/RewriterTestContext.h" +#include "clang/Lex/Lexer.h" +#include "clang/Format/Format.h" +#include "gtest/gtest.h" + +namespace clang { +namespace format { + +class FormatTest : public ::testing::Test { +protected: + std::string format(llvm::StringRef Code, unsigned Offset, unsigned Length, + const FormatStyle &Style) { + RewriterTestContext Context; + FileID ID = Context.createInMemoryFile("input.cc", Code); + SourceLocation Start = + Context.Sources.getLocForStartOfFile(ID).getLocWithOffset(Offset); + std::vector Ranges( + 1, + CharSourceRange::getCharRange(Start, Start.getLocWithOffset(Length))); + LangOptions LangOpts; + LangOpts.CPlusPlus = 1; + Lexer Lex(ID, Context.Sources.getBuffer(ID), Context.Sources, LangOpts); + tooling::Replacements Replace = + reformat(Style, Lex, Context.Sources, Ranges); + EXPECT_TRUE(applyAllReplacements(Replace, Context.Rewrite)); + return Context.getRewrittenText(ID); + } + + std::string format(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + return format(Code, 0, Code.size(), Style); + } + + void verifyFormat(llvm::StringRef Code) { + std::string WithoutFormat(Code.str()); + for (unsigned i = 0, e = WithoutFormat.size(); i != e; ++i) { + if (WithoutFormat[i] == '\n') + WithoutFormat[i] = ' '; + } + EXPECT_EQ(Code.str(), format(WithoutFormat)); + } + + void verifyGoogleFormat(llvm::StringRef Code) { + std::string WithoutFormat(Code.str()); + for (unsigned i = 0, e = WithoutFormat.size(); i != e; ++i) { + if (WithoutFormat[i] == '\n') + WithoutFormat[i] = ' '; + } + EXPECT_EQ(Code.str(), format(WithoutFormat, getGoogleStyle())); + } +}; + +TEST_F(FormatTest, DoesNotChangeCorrectlyFormatedCode) { + EXPECT_EQ(";", format(";")); +} + +TEST_F(FormatTest, FormatsGlobalStatementsAt0) { + EXPECT_EQ("int i;", format(" int i;")); + EXPECT_EQ("\nint i;", format(" \n\t \r int i;")); + EXPECT_EQ("int i;\nint j;", format(" int i; int j;")); + EXPECT_EQ("int i;\nint j;", format(" int i;\n int j;")); +} + +TEST_F(FormatTest, FormatsUnwrappedLinesAtFirstFormat) { + EXPECT_EQ("int i;", format("int\ni;")); +} + +TEST_F(FormatTest, FormatsNestedBlockStatements) { + EXPECT_EQ("{\n {\n {\n }\n }\n}", format("{{{}}}")); +} + +TEST_F(FormatTest, FormatsForLoop) { + verifyFormat( + "for (int VeryVeryLongLoopVariable = 0; VeryVeryLongLoopVariable < 10;\n" + " ++VeryVeryLongLoopVariable);"); +} + +TEST_F(FormatTest, FormatsWhileLoop) { + verifyFormat("while (true) {\n}"); +} + +TEST_F(FormatTest, FormatsNestedCall) { + verifyFormat("Method(f1, f2(f3));"); + verifyFormat("Method(f1(f2, f3()));"); +} + +TEST_F(FormatTest, FormatsAwesomeMethodCall) { + verifyFormat( + "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n" + " parameter, parameter, parameter)), SecondLongCall(parameter));"); +} + +TEST_F(FormatTest, FormatsFunctionDefinition) { + verifyFormat("void f(int a, int b, int c, int d, int e, int f, int g," + " int h, int j, int f,\n" + " int c, int ddddddddddddd) {\n" + "}"); +} + +TEST_F(FormatTest, FormatIfWithoutCompountStatement) { + verifyFormat("if (true)\n f();\ng();"); + verifyFormat("if (a)\n if (b)\n if (c)\n g();\nh();"); + verifyFormat("if (a)\n if (b) {\n f();\n }\ng();"); +} + +TEST_F(FormatTest, ParseIfThenElse) { + verifyFormat("if (true)\n" + " if (true)\n" + " if (true)\n" + " f();\n" + " else\n" + " g();\n" + " else\n" + " h();\n" + "else\n" + " i();"); + verifyFormat("if (true)\n" + " if (true)\n" + " if (true) {\n" + " if (true)\n" + " f();\n" + " } else {\n" + " g();\n" + " }\n" + " else\n" + " h();\n" + "else {\n" + " i();\n" + "}"); +} + +TEST_F(FormatTest, UnderstandsSingleLineComments) { + EXPECT_EQ("// line 1\n// line 2\nvoid f() {\n}\n", + format("// line 1\n// line 2\nvoid f() {}\n")); + + EXPECT_EQ("void f() {\n // Doesn't do anything\n}", + format("void f() {\n// Doesn't do anything\n}")); + + EXPECT_EQ("int i // This is a fancy variable\n = 5;", + format("int i // This is a fancy variable\n= 5;")); + + verifyFormat("f(/*test=*/ true);"); +} + +TEST_F(FormatTest, DoesNotBreakSemiAfterClassDecl) { + verifyFormat("class A {\n};"); +} + +TEST_F(FormatTest, BreaksAsHighAsPossible) { + verifyFormat( + "if ((aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaaaaaaaa) ||\n" + " (bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb && bbbbbbbbbbbbbbbbbbbbbbbbbb))\n" + " f();"); +} + +TEST_F(FormatTest, ElseIf) { + verifyFormat("if (a) {\n" + "} else if (b) {\n" + "}"); + verifyFormat("if (a)\n" + " f();\n" + "else if (b)\n" + " g();\n" + "else\n" + " h();"); +} + +TEST_F(FormatTest, UnderstandsAccessSpecifiers) { + verifyFormat("class A {\n" + "public:\n" + "protected:\n" + "private:\n" + " void f() {\n" + " }\n" + "};"); + verifyGoogleFormat("class A {\n" + " public:\n" + " protected:\n" + " private:\n" + " void f() {\n" + " }\n" + "};"); +} + +TEST_F(FormatTest, SwitchStatement) { + verifyFormat("switch (x) {\n" + "case 1:\n" + " f();\n" + " break;\n" + "case kFoo:\n" + "case ns::kBar:\n" + "case kBaz:\n" + " break;\n" + "default:\n" + " g();\n" + " break;\n" + "}"); + verifyFormat("switch (x) {\n" + "case 1: {\n" + " f();\n" + " break;\n" + "}\n" + "}"); + verifyFormat("switch (test)\n" + " ;"); +} + +TEST_F(FormatTest, Labels) { + verifyFormat("void f() {\n" + " some_code();\n" + "test_label:\n" + " some_other_code();\n" + " {\n" + " some_more_code();\n" + " another_label:\n" + " some_more_code();\n" + " }\n" + "}"); + verifyFormat("some_code();\n" + "test_label:\n" + "some_other_code();"); +} + +TEST_F(FormatTest, DerivedClass) { + verifyFormat("class A : public B {\n" + "};"); +} + +TEST_F(FormatTest, DoWhile) { + verifyFormat("do {\n" + " do_something();\n" + "} while (something());"); + verifyFormat("do\n" + " do_something();\n" + "while (something());"); +} + +TEST_F(FormatTest, BreaksDesireably) { + verifyFormat("if (aaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaa) ||\n" + " aaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaa) ||\n" + " aaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaa)) {\n};"); + + verifyFormat( + "aaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) {\n}"); + + verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa));"); +} + +TEST_F(FormatTest, AlignsStringLiterals) { + verifyFormat("loooooooooooooooooooooooooongFunction(\"short literal \"\n" + " \"short literal\");"); + verifyFormat( + "looooooooooooooooooooooooongFunction(\n" + " \"short literal\"\n" + " \"looooooooooooooooooooooooooooooooooooooooooooooooong literal\");"); +} + +TEST_F(FormatTest, UnderstandsEquals) { + verifyFormat( + "aaaaaaaaaaaaaaaaa =\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa;"); + verifyFormat( + "if (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa =\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) {\n" + "}"); + verifyFormat( + "if (a) {\n" + "} else if (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa =\n" + " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) {\n" + "}"); + + verifyFormat("if (int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa =\n" + " 100000000 + 100000000) {\n}"); +} + +TEST_F(FormatTest, UnderstandsTemplateParameters) { + verifyFormat("A a;"); + verifyFormat("A > > a;"); + verifyFormat("A, 3>, 4> a;"); + verifyFormat("bool x = a < 1 || 2 > a;"); + verifyFormat("bool x = 5 < f();"); + verifyFormat("bool x = f() > 5;"); + verifyFormat("bool x = 5 < a::x;"); + verifyFormat("bool x = a < 4 ? a > 2 : false;"); + verifyFormat("bool x = f() ? a < 2 : a > 2;"); + + verifyGoogleFormat("A> a;"); + verifyGoogleFormat("A>> a;"); + verifyGoogleFormat("A>>> a;"); + + verifyFormat("test >> a >> b;"); + verifyFormat("test << a >> b;"); + + verifyFormat("f();"); + verifyFormat("template void f() {\n}"); +} + +TEST_F(FormatTest, UndestandsUnaryOperators) { + verifyFormat("int a = -2;"); +} + +TEST_F(FormatTest, UndestandsOverloadedOperators) { + verifyFormat("bool operator<() {\n}"); +} + +TEST_F(FormatTest, UnderstandsUsesOfStar) { + verifyFormat("int *f(int *a) {\n}"); + verifyFormat("f(a, *a);"); + verifyFormat("f(*a);"); + verifyFormat("int a = b * 10;"); + verifyFormat("int a = 10 * b;"); + // verifyFormat("int a = b * c;"); + verifyFormat("int a = *b;"); + // verifyFormat("int a = *b * c;"); + // verifyFormat("int a = b * *c;"); +} + +//TEST_F(FormatTest, IncorrectDerivedClass) { +// verifyFormat("public B {\n" +// "};"); +//} + +} // end namespace tooling +} // end namespace clang diff --git a/clang/unittests/Format/Makefile b/clang/unittests/Format/Makefile new file mode 100644 index 0000000..4218622 --- /dev/null +++ b/clang/unittests/Format/Makefile @@ -0,0 +1,19 @@ +##===- unittests/Format/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL = ../.. +TESTNAME = Format +include $(CLANG_LEVEL)/../../Makefile.config +LINK_COMPONENTS := $(TARGETS_TO_BUILD) asmparser support mc +USEDLIBS = clangFormat.a clangTooling.a clangFrontend.a clangSerialization.a \ + clangDriver.a clangParse.a clangRewriteCore.a + clangRewriteFrontend.a clangSema.a clangAnalysis.a clangEdit.a \ + clangAST.a clangASTMatchers.a clangLex.a clangBasic.a + +include $(CLANG_LEVEL)/unittests/Makefile -- 2.7.4