From: Fariborz Jahanian Date: Tue, 31 Aug 2010 23:34:27 +0000 (+0000) Subject: Some support for unicode string constants X-Git-Tag: llvmorg-2.8.0-rc0~341 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=abaae2b69291fdadb9ca0572a38808ced9831105;p=platform%2Fupstream%2Fllvm.git Some support for unicode string constants in wide strings. radar 8360841. llvm-svn: 112672 --- diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index a12c4ae0d401..c758b4003256 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, SourceLocation Loc, Preprocessor &PP, + bool wide, bool Complain) { // FIXME: Add a warning - UCN's are only valid in C++ & C99. // FIXME: Handle wide strings. @@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, UTF32 UcnVal = 0; unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); + unsigned short UcnLenSave = UcnLen; for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) { int CharVal = HexDigitValue(ThisTokBuf[0]); if (CharVal == -1) break; @@ -214,6 +216,16 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, HadError = 1; return; } + if (wide) { + assert(UcnLenSave == 4 && + "ProcessUCNEscape - only ucn length of 4 supported"); + // little endian assumed. + *ResultBuf++ = (UcnVal & 0x000000FF); + *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; + *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; + *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; + return; + } // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. // The conversion below was inspired by: // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c @@ -830,12 +842,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, } const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. - + bool wide = false; // TODO: Input character set mapping support. // Skip L marker for wide strings. - if (ThisTokBuf[0] == 'L') + if (ThisTokBuf[0] == 'L') { + wide = true; ++ThisTokBuf; + } assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); ++ThisTokBuf; @@ -880,7 +894,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // Is this a Universal Character Name escape? if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, - hadError, StringToks[i].getLocation(), PP, Complain); + hadError, StringToks[i].getLocation(), PP, wide, + Complain); continue; } // Otherwise, this is a non-UCN escape character. Process it. diff --git a/clang/test/CodeGenCXX/uncode-string.cpp b/clang/test/CodeGenCXX/uncode-string.cpp new file mode 100644 index 000000000000..e5431497479e --- /dev/null +++ b/clang/test/CodeGenCXX/uncode-string.cpp @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s +// rdar://8360841 + +wchar_t s[] = L"\u2722"; + +// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"