Some support for unicode string constants

author Fariborz Jahanian <fjahanian@apple.com>

Tue, 31 Aug 2010 23:34:27 +0000 (23:34 +0000)

committer Fariborz Jahanian <fjahanian@apple.com>

Tue, 31 Aug 2010 23:34:27 +0000 (23:34 +0000)
author Fariborz Jahanian <fjahanian@apple.com>
Tue, 31 Aug 2010 23:34:27 +0000 (23:34 +0000)
committer Fariborz Jahanian <fjahanian@apple.com>
Tue, 31 Aug 2010 23:34:27 +0000 (23:34 +0000)
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp

index a12c4ae0d4014655ba76edba0a8929508d887649..c758b40032567c6265b936b64287889ffca582f8 100644 (file)
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
  static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
                               char *&ResultBuf, bool &HadError,
                               SourceLocation Loc, Preprocessor &PP,
+                             bool wide,
                               bool Complain) {
    // FIXME: Add a warning - UCN's are only valid in C++ & C99.
    // FIXME: Handle wide strings.
@@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
  
    UTF32 UcnVal = 0;
    unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
+  unsigned short UcnLenSave = UcnLen;
    for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
      int CharVal = HexDigitValue(ThisTokBuf[0]);
      if (CharVal == -1) break;
@@ -214,6 +216,16 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
      HadError = 1;
      return;
    }
+  if (wide) {
+    assert(UcnLenSave == 4 && 
+           "ProcessUCNEscape - only ucn length of 4 supported");
+    // little endian assumed.
+    *ResultBuf++ = (UcnVal & 0x000000FF);
+    *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+    *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
+    *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+    return;
+  }
    // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
    // The conversion below was inspired by:
    //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
@@ -830,12 +842,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
      }
  
      const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
-
+    bool wide = false;
      // TODO: Input character set mapping support.
  
      // Skip L marker for wide strings.
-    if (ThisTokBuf[0] == 'L')
+    if (ThisTokBuf[0] == 'L') {
+      wide = true;
        ++ThisTokBuf;
+    }
  
      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
      ++ThisTokBuf;
@@ -880,7 +894,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
        // Is this a Universal Character Name escape?
        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
          ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
-                         hadError, StringToks[i].getLocation(), PP, Complain);
+                         hadError, StringToks[i].getLocation(), PP, wide, 
+                         Complain);
          continue;
        }
        // Otherwise, this is a non-UCN escape character.  Process it.
diff --git a/clang/test/CodeGenCXX/uncode-string.cpp b/clang/test/CodeGenCXX/uncode-string.cpp

new file mode 100644 (file)

index 0000000..e543149
--- /dev/null
+++ b/clang/test/CodeGenCXX/uncode-string.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
+// rdar://8360841
+
+wchar_t s[] = L"\u2722";
+
+// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"
author	Fariborz Jahanian <fjahanian@apple.com>
	Tue, 31 Aug 2010 23:34:27 +0000 (23:34 +0000)
committer	Fariborz Jahanian <fjahanian@apple.com>
	Tue, 31 Aug 2010 23:34:27 +0000 (23:34 +0000)
clang/lib/Lex/LiteralSupport.cpp		patch \| blob \| history
clang/test/CodeGenCXX/uncode-string.cpp	[new file with mode: 0644]	patch \| blob