As an extension, treat Unicode whitespace characters as whitespace.

author Jordan Rose <jordan_rose@apple.com>

Thu, 24 Jan 2013 20:50:50 +0000 (20:50 +0000)

committer Jordan Rose <jordan_rose@apple.com>

Thu, 24 Jan 2013 20:50:50 +0000 (20:50 +0000)
author Jordan Rose <jordan_rose@apple.com>
Thu, 24 Jan 2013 20:50:50 +0000 (20:50 +0000)
committer Jordan Rose <jordan_rose@apple.com>
Thu, 24 Jan 2013 20:50:50 +0000 (20:50 +0000)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp

index e6ffca9..2a57e6f 100644 (file)
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2791,7 +2791,30 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
    return CodePoint;
  }
  
+static bool isUnicodeWhitespace(uint32_t C) {
+  return (C == 0x0085 || C == 0x00A0 || C == 0x1680 ||
+          C == 0x180E || (C >= 0x2000 && C <= 0x200A) ||
+          C == 0x2028 || C == 0x2029 || C == 0x202F ||
+          C == 0x205F || C == 0x3000);
+}
+
  void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+  if (isUnicodeWhitespace(C)) {
+    if (!isLexingRawMode()) {
+      CharSourceRange CharRange =
+        CharSourceRange::getCharRange(getSourceLocation(),
+                                      getSourceLocation(CurPtr));
+      Diag(BufferPtr, diag::ext_unicode_whitespace)
+        << CharRange;
+    }
+
+    Result.setFlag(Token::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr))
+      return; // KeepWhitespaceMode
+
+    return LexTokenInternal(Result);
+  }
+
    if (isAllowedIDChar(C) && isAllowedInitiallyIDChar(C)) {
      MIOpt.ReadToken();
      return LexIdentifier(Result, CurPtr);
diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c

new file mode 100644 (file)

index 0000000..1d7b53e
--- /dev/null
+++ b/clang/test/Lexer/unicode.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+// This file contains Unicode characters; please do not "fix" them!
+
+extern int x; // expected-warning {{treating Unicode character as whitespace}}
+extern int　x; // expected-warning {{treating Unicode character as whitespace}}
author	Jordan Rose <jordan_rose@apple.com>
	Thu, 24 Jan 2013 20:50:50 +0000 (20:50 +0000)
committer	Jordan Rose <jordan_rose@apple.com>
	Thu, 24 Jan 2013 20:50:50 +0000 (20:50 +0000)
clang/lib/Lex/Lexer.cpp		patch \| blob \| history
clang/test/Lexer/unicode.c	[new file with mode: 0644]	patch \| blob