Don't warn about Unicode characters in -E mode.

author Jordan Rose <jordan_rose@apple.com>

Wed, 30 Jan 2013 01:52:57 +0000 (01:52 +0000)

committer Jordan Rose <jordan_rose@apple.com>

Wed, 30 Jan 2013 01:52:57 +0000 (01:52 +0000)
author Jordan Rose <jordan_rose@apple.com>
Wed, 30 Jan 2013 01:52:57 +0000 (01:52 +0000)
committer Jordan Rose <jordan_rose@apple.com>
Wed, 30 Jan 2013 01:52:57 +0000 (01:52 +0000)
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h

index 9793aa6..e273dd6 100644 (file)
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -25,7 +25,7 @@ public:
  
  public:
    PreprocessorOutputOptions() {
-    ShowCPP = 1;
+    ShowCPP = 0;
      ShowComments = 0;
      ShowLineMarkers = 1;
      ShowMacroComments = 0;
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h

index 24b6a18..fc092e1 100644 (file)
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -160,6 +160,9 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
    /// \brief True if pragmas are enabled.
    bool PragmasEnabled : 1;
  
+  /// \brief True if the current build action is a preprocessing action.
+  bool PreprocessedOutput : 1;
+
    /// \brief True if we are currently preprocessing a #if or #elif directive
    bool ParsingIfOrElifDirective;
  
@@ -474,6 +477,16 @@ public:
      return SuppressIncludeNotFoundError;
    }
  
+  /// Sets whether the preprocessor is responsible for producing output or if
+  /// it is producing tokens to be consumed by Parse and Sema.
+  void setPreprocessedOutput(bool IsPreprocessedOutput) {
+    PreprocessedOutput = IsPreprocessedOutput;
+  }
+
+  /// Returns true if the preprocessor is responsible for generating output,
+  /// false if it is producing tokens to be consumed by Parse and Sema.
+  bool isPreprocessedOutput() const { return PreprocessedOutput; }
+
    /// isCurrentLexer - Return true if we are lexing directly from the specified
    /// lexer.
    bool isCurrentLexer(const PreprocessorLexer *L) const {
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp

index a7f0770..27f96b0 100644 (file)
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -243,6 +243,8 @@ void CompilerInstance::createPreprocessor() {
  
    InitializePreprocessor(*PP, PPOpts, getHeaderSearchOpts(), getFrontendOpts());
  
+  PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP);
+
    // Set up the module path, including the hash for the
    // module-creation options.
    SmallString<256> SpecificModuleCache(
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp

index f49f30d..b4b0ddb 100644 (file)
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1395,9 +1395,48 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
  }
  
  static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
-                                        ArgList &Args) {
+                                        ArgList &Args,
+                                        frontend::ActionKind Action) {
    using namespace options;
-  Opts.ShowCPP = !Args.hasArg(OPT_dM);
+
+  switch (Action) {
+  case frontend::ASTDeclList:
+  case frontend::ASTDump:
+  case frontend::ASTDumpXML:
+  case frontend::ASTPrint:
+  case frontend::ASTView:
+  case frontend::EmitAssembly:
+  case frontend::EmitBC:
+  case frontend::EmitHTML:
+  case frontend::EmitLLVM:
+  case frontend::EmitLLVMOnly:
+  case frontend::EmitCodeGenOnly:
+  case frontend::EmitObj:
+  case frontend::FixIt:
+  case frontend::GenerateModule:
+  case frontend::GeneratePCH:
+  case frontend::GeneratePTH:
+  case frontend::ParseSyntaxOnly:
+  case frontend::PluginAction:
+  case frontend::PrintDeclContext:
+  case frontend::RewriteObjC:
+  case frontend::RewriteTest:
+  case frontend::RunAnalysis:
+  case frontend::MigrateSource:
+    Opts.ShowCPP = 0;
+    break;
+
+  case frontend::DumpRawTokens:
+  case frontend::DumpTokens:
+  case frontend::InitOnly:
+  case frontend::PrintPreamble:
+  case frontend::PrintPreprocessedInput:
+  case frontend::RewriteMacros:
+  case frontend::RunPreprocessorOnly:
+    Opts.ShowCPP = !Args.hasArg(OPT_dM);
+    break;
+  }
+
    Opts.ShowComments = Args.hasArg(OPT_C);
    Opts.ShowLineMarkers = !Args.hasArg(OPT_P);
    Opts.ShowMacroComments = Args.hasArg(OPT_CC);
@@ -1478,7 +1517,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
    // parameters from the function and the "FileManager.h" #include.
    FileManager FileMgr(Res.getFileSystemOpts());
    ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags);
-  ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args);
+  ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args,
+                              Res.getFrontendOpts().ProgramAction);
    ParseTargetArgs(Res.getTargetOpts(), *Args);
  
    return Success;
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp

index 3e3aaae..08f406b 100644 (file)
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2811,14 +2811,13 @@ static bool isUnicodeWhitespace(uint32_t C) {
  }
  
  void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
-  if (isUnicodeWhitespace(C)) {
-    if (!isLexingRawMode()) {
-      CharSourceRange CharRange =
-        CharSourceRange::getCharRange(getSourceLocation(),
-                                      getSourceLocation(CurPtr));
-      Diag(BufferPtr, diag::ext_unicode_whitespace)
-        << CharRange;
-    }
+  if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+      isUnicodeWhitespace(C)) {
+    CharSourceRange CharRange =
+      CharSourceRange::getCharRange(getSourceLocation(),
+                                    getSourceLocation(CurPtr));
+    Diag(BufferPtr, diag::ext_unicode_whitespace)
+      << CharRange;
  
      Result.setFlag(Token::LeadingSpace);
      if (SkipWhitespace(Result, CurPtr))
@@ -2832,7 +2831,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
      return LexIdentifier(Result, CurPtr);
    }
  
-  if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
+  if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+      !isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
      // Non-ASCII characters tend to creep into source code unintentionally.
      // Instead of letting the parser complain about the unknown token,
      // just drop the character.
@@ -2842,13 +2842,11 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
      // loophole in the mapping of Unicode characters to basic character set
      // characters that allows us to map these particular characters to, say,
      // whitespace.
-    if (!isLexingRawMode()) {
-      CharSourceRange CharRange =
-        CharSourceRange::getCharRange(getSourceLocation(),
-                                      getSourceLocation(CurPtr));
-      Diag(BufferPtr, diag::err_non_ascii)
-        << FixItHint::CreateRemoval(CharRange);
-    }
+    CharSourceRange CharRange =
+      CharSourceRange::getCharRange(getSourceLocation(),
+                                    getSourceLocation(CurPtr));
+    Diag(BufferPtr, diag::err_non_ascii)
+      << FixItHint::CreateRemoval(CharRange);
  
      BufferPtr = CurPtr;
      return LexTokenInternal(Result);
@@ -3537,11 +3535,15 @@ LexNextToken:
      if (Status == conversionOK)
        return LexUnicode(Result, CodePoint, CurPtr);
      
+    if (isLexingRawMode() || PP->isPreprocessedOutput()) {
+      Kind = tok::unknown;
+      break;
+    }
+
      // Non-ASCII characters tend to creep into source code unintentionally.
      // Instead of letting the parser complain about the unknown token,
      // just diagnose the invalid UTF-8, then drop the character.
-    if (!isLexingRawMode())
-      Diag(CurPtr, diag::err_invalid_utf8);
+    Diag(CurPtr, diag::err_invalid_utf8);
  
      BufferPtr = CurPtr+1;
      goto LexNextToken;
diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c

index 1d7b53e..26e77f6 100644 (file)
--- a/clang/test/Lexer/unicode.c
+++ b/clang/test/Lexer/unicode.c
@@ -1,6 +1,15 @@
  // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -E -DPP_ONLY=1 %s -o %t
+// RUN: FileCheck --strict-whitespace --input-file=%t %s
  
  // This file contains Unicode characters; please do not "fix" them!
  
  extern int x; // expected-warning {{treating Unicode character as whitespace}}
  extern int　x; // expected-warning {{treating Unicode character as whitespace}}
+
+// CHECK: extern int {{x}}
+// CHECK: extern int　{{x}}
+
+#if PP_ONLY
+CHECK: The preprocessor should not complain about Unicode characters like ©.
+#endif
author	Jordan Rose <jordan_rose@apple.com>
	Wed, 30 Jan 2013 01:52:57 +0000 (01:52 +0000)
committer	Jordan Rose <jordan_rose@apple.com>
	Wed, 30 Jan 2013 01:52:57 +0000 (01:52 +0000)
clang/include/clang/Frontend/PreprocessorOutputOptions.h		patch \| blob \| history
clang/include/clang/Lex/Preprocessor.h		patch \| blob \| history
clang/lib/Frontend/CompilerInstance.cpp		patch \| blob \| history
clang/lib/Frontend/CompilerInvocation.cpp		patch \| blob \| history
clang/lib/Lex/Lexer.cpp		patch \| blob \| history
clang/test/Lexer/unicode.c		patch \| blob \| history