From c2271926a4fc395e05cf75a8e57c2dfab1f02d3d Mon Sep 17 00:00:00 2001 From: Manuel Klimek Date: Fri, 19 Nov 2021 14:11:53 +0100 Subject: [PATCH] Make clang-format fuzz through Lexing with asserts enabled. Makes clang-format bail out if an in-memory source file with an unsupported BOM is handed in instead of creating source locations that are violating clang's assumptions. In the future, we should add support to better transport error messages like this through clang-format instead of printing to stderr and not creating any changes. --- clang/lib/Format/Format.cpp | 30 ++++++++++++----- clang/lib/Format/QualifierAlignmentFixer.cpp | 12 ++++--- clang/lib/Format/SortJavaScriptImports.cpp | 5 ++- clang/lib/Format/TokenAnalyzer.cpp | 49 ++++++++++++++++++++++++---- clang/lib/Format/TokenAnalyzer.h | 12 +++++-- 5 files changed, 85 insertions(+), 23 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 1d60f2b..085cca8 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2989,8 +2989,10 @@ reformat(const FormatStyle &Style, StringRef Code, if (Style.isJson()) { std::vector Ranges(1, tooling::Range(0, Code.size())); auto Env = - std::make_unique(Code, FileName, Ranges, FirstStartColumn, + Environment::make(Code, FileName, Ranges, FirstStartColumn, NextStartColumn, LastStartColumn); + if (!Env) + return {}; // Perform the actual formatting pass. tooling::Replacements Replaces = Formatter(*Env, Style, Status).process().first; @@ -3046,9 +3048,10 @@ reformat(const FormatStyle &Style, StringRef Code, return TrailingCommaInserter(Env, Expanded).process(); }); - auto Env = - std::make_unique(Code, FileName, Ranges, FirstStartColumn, - NextStartColumn, LastStartColumn); + auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn, + NextStartColumn, LastStartColumn); + if (!Env) + return {}; llvm::Optional CurrentCode = None; tooling::Replacements Fixes; unsigned Penalty = 0; @@ -3061,10 +3064,12 @@ reformat(const FormatStyle &Style, StringRef Code, Penalty += PassFixes.second; if (I + 1 < E) { CurrentCode = std::move(*NewCode); - Env = std::make_unique( + Env = Environment::make( *CurrentCode, FileName, tooling::calculateRangesAfterReplacements(Fixes, Ranges), FirstStartColumn, NextStartColumn, LastStartColumn); + if (!Env) + return {}; } } } @@ -3090,7 +3095,10 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, // cleanups only apply to C++ (they mostly concern ctor commas etc.) if (Style.Language != FormatStyle::LK_Cpp) return tooling::Replacements(); - return Cleaner(Environment(Code, FileName, Ranges), Style).process().first; + auto Env = Environment::make(Code, FileName, Ranges); + if (!Env) + return {}; + return Cleaner(*Env, Style).process().first; } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, @@ -3107,7 +3115,10 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName) { - return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style) + auto Env = Environment::make(Code, FileName, Ranges); + if (!Env) + return {}; + return NamespaceEndCommentsFixer(*Env, Style) .process() .first; } @@ -3116,7 +3127,10 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName) { - return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style) + auto Env = Environment::make(Code, FileName, Ranges); + if (!Env) + return {}; + return UsingDeclarationsSorter(*Env, Style) .process() .first; } diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp index c70705a..5a89225 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -61,10 +61,10 @@ QualifierAlignmentFixer::QualifierAlignmentFixer( std::pair QualifierAlignmentFixer::analyze( TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) { - - auto Env = - std::make_unique(Code, FileName, Ranges, FirstStartColumn, - NextStartColumn, LastStartColumn); + auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn, + NextStartColumn, LastStartColumn); + if (!Env) + return {}; llvm::Optional CurrentCode = None; tooling::Replacements Fixes; for (size_t I = 0, E = Passes.size(); I < E; ++I) { @@ -75,10 +75,12 @@ std::pair QualifierAlignmentFixer::analyze( Fixes = Fixes.merge(PassFixes.first); if (I + 1 < E) { CurrentCode = std::move(*NewCode); - Env = std::make_unique( + Env = Environment::make( *CurrentCode, FileName, tooling::calculateRangesAfterReplacements(Fixes, Ranges), FirstStartColumn, NextStartColumn, LastStartColumn); + if (!Env) + return {}; } } } diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index a5e3ce6..515cfce 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -550,7 +550,10 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, ArrayRef Ranges, StringRef FileName) { // FIXME: Cursor support. - return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) + auto Env = Environment::make(Code, FileName, Ranges); + if (!Env) + return {}; + return JavaScriptImportSorter(*Env, Style) .process() .first; } diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp index f1459a8..a619c6d 100644 --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -26,26 +26,61 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" +#include #define DEBUG_TYPE "format-formatter" namespace clang { namespace format { +// FIXME: Instead of printing the diagnostic we should store it and have a +// better way to return errors through the format APIs. +class FatalDiagnosticConsumer: public DiagnosticConsumer { +public: + void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, + const Diagnostic &Info) override { + if (DiagLevel == DiagnosticsEngine::Fatal) { + Fatal = true; + llvm::SmallVector Message; + Info.FormatDiagnostic(Message); + llvm::errs() << Message << "\n"; + } + } + + bool fatalError() const { return Fatal; } + +private: + bool Fatal = false; +}; + +std::unique_ptr +Environment::make(StringRef Code, StringRef FileName, + ArrayRef Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn) { + auto Env = std::make_unique(Code, FileName, FirstStartColumn, + NextStartColumn, LastStartColumn); + FatalDiagnosticConsumer Diags; + Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false); + SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID); + for (const tooling::Range &Range : Ranges) { + SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); + SourceLocation End = Start.getLocWithOffset(Range.getLength()); + Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); + } + // Validate that we can get the buffer data without a fatal error. + Env->SM.getBufferData(Env->ID); + if (Diags.fatalError()) return nullptr; + return Env; +} + Environment::Environment(StringRef Code, StringRef FileName, - ArrayRef Ranges, unsigned FirstStartColumn, unsigned NextStartColumn, unsigned LastStartColumn) : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()), ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn), NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) { - SourceLocation StartOfFile = SM.getLocForStartOfFile(ID); - for (const tooling::Range &Range : Ranges) { - SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); - SourceLocation End = Start.getLocWithOffset(Range.getLength()); - CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); - } } TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h index 5ce44a0..aaca518 100644 --- a/clang/lib/Format/TokenAnalyzer.h +++ b/clang/lib/Format/TokenAnalyzer.h @@ -29,6 +29,7 @@ #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" +#include namespace clang { namespace format { @@ -40,8 +41,7 @@ public: // that the next lines of \p Code should start at \p NextStartColumn, and // that \p Code should end at \p LastStartColumn if it ends in newline. // See also the documentation of clang::format::internal::reformat. - Environment(StringRef Code, StringRef FileName, - ArrayRef Ranges, unsigned FirstStartColumn = 0, + Environment(StringRef Code, StringRef FileName, unsigned FirstStartColumn = 0, unsigned NextStartColumn = 0, unsigned LastStartColumn = 0); FileID getFileID() const { return ID; } @@ -62,6 +62,14 @@ public: // environment should end if it ends in a newline. unsigned getLastStartColumn() const { return LastStartColumn; } + // Returns nullptr and prints a diagnostic to stderr if the environment + // can't be created. + static std::unique_ptr make(StringRef Code, StringRef FileName, + ArrayRef Ranges, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); + private: // This is only set if constructed from string. std::unique_ptr VirtualSM; -- 2.7.4