UTF8 Marshaling support(UnmanagedType.LPUTF8Str)
authorTijoy Tom Kalathiparambil <tijoytk@microsoft.com>
Tue, 3 May 2016 21:25:22 +0000 (14:25 -0700)
committertijoytk <tijoytk@microsoft.com>
Sat, 7 May 2016 01:26:29 +0000 (18:26 -0700)
Usage: [MarshalAs(UnmanagedType.LPUTF8Str)] applied to string
and stringbuilder.

Implementation mostly use Encoding.UTF8 API to do the byte buffer
to string roundtripping. Introducing two new marshalers,
UTF8StringMarshaler and UTF8BufferMarshaler which handle string
and StringBuilder respectively. [Out] StringBuilder marshaling use
builder capacity as the buffer size ie (builder. Capacity + 1) *3
which is enough for any UTF8 char in BMP plane, infact Encoding.UTF8
mscorlib APIs use the same length.All marshaling flags(ThrowOnUnmapable,
defaultchar) are ignored since they do not make sense in UTF16 to UTD8
context.

The public contracts are not yet updated, the public contracts and
public marshaling API (Marshal.PtrToStringUtf8 and StringToHGlobalUtf8)
will be added once the implementation is in. The marshal api are anyway
going to be a wrapper around Encoding.GetBytes and GetChars.

20 files changed:
src/inc/corhdr.h
src/mscorlib/model.xml
src/mscorlib/src/System/Runtime/InteropServices/Attributes.cs
src/mscorlib/src/System/String.cs
src/mscorlib/src/System/StubHelpers.cs
src/vm/fieldmarshaler.cpp
src/vm/fieldmarshaler.h
src/vm/ilmarshalers.cpp
src/vm/ilmarshalers.h
src/vm/methodtable.cpp
src/vm/mlinfo.cpp
src/vm/mscorlib.h
src/vm/mtypes.h
src/vm/nsenums.h
tests/src/Interop/CMakeLists.txt
tests/src/Interop/StringMarshalling/UTF8/CMakeLists.txt [new file with mode: 0644]
tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs [new file with mode: 0644]
tests/src/Interop/StringMarshalling/UTF8/UTF8Test.csproj [new file with mode: 0644]
tests/src/Interop/StringMarshalling/UTF8/UTF8TestNative.cpp [new file with mode: 0644]
tests/src/Interop/StringMarshalling/UTF8/project.json [new file with mode: 0644]

index 071490d..c194def 100644 (file)
@@ -1077,7 +1077,7 @@ typedef enum CorNativeType
 
     NATIVE_TYPE_IINSPECTABLE = 0x2e,
     NATIVE_TYPE_HSTRING     = 0x2f,
-
+    NATIVE_TYPE_LPUTF8STR   = 0x30, // utf-8 string
     NATIVE_TYPE_MAX         = 0x50, // first invalid element type
 } CorNativeType;
 
index d20c3c4..28ba1a6 100644 (file)
       <Member MemberType="Field" Name="LPStruct" />
       <Member MemberType="Field" Name="LPTStr" />
       <Member MemberType="Field" Name="LPWStr" />
+      <Member MemberType="Field" Name="LPUTF8Str" />
       <Member MemberType="Field" Name="R4" />
       <Member MemberType="Field" Name="R8" />
       <Member MemberType="Field" Name="SafeArray" />
       <Member Name="ConvertToManaged(System.IntPtr)" />
       <Member Name="ClearNative(System.IntPtr)" />
     </Type>
+     <Type Status="ImplRoot" Name="System.StubHelpers.UTF8Marshaler">
+      <Member Name="ConvertToNative(System.Int32,System.String,System.IntPtr)" />
+      <Member Name="ConvertToManaged(System.IntPtr)" />
+      <Member Name="ClearNative(System.IntPtr)" />
+    </Type>    
+      <Type Status="ImplRoot" Name="System.StubHelpers.UTF8BufferMarshaler">
+      <Member Name="ConvertToNative(System.Text.StringBuilder,System.IntPtr,System.Int32)" />
+      <Member Name="ConvertToManaged(System.Text.StringBuilder,System.IntPtr)" />      
+    </Type>    
     <Type Status="ApiFxInternal" Name="System.StubHelpers.EventArgsMarshaler" Condition="FEATURE_COMINTEROP">
       <Member Name="CreateNativeNCCEventArgsInstance(System.Int32,System.Object,System.Object,System.Int32,System.Int32)"/>
       <Member Name="CreateNativePCEventArgsInstance(System.String)" />
index 9e6ae6b..06c963a 100644 (file)
@@ -516,6 +516,9 @@ namespace System.Runtime.InteropServices{
         
         [System.Runtime.InteropServices.ComVisible(false)]
         HString          = 0x2f,        // Windows Runtime HSTRING
+
+        [System.Runtime.InteropServices.ComVisible(false)]
+        LPUTF8Str        = 0x30,        // UTF8 string
     }
 
     [AttributeUsage(AttributeTargets.Parameter | AttributeTargets.Field | AttributeTargets.ReturnValue, Inherited = false)]
index e00fae1..d2b5b96 100644 (file)
@@ -1441,6 +1441,16 @@ namespace System {
 
             return s;
         }
+                
+        [System.Security.SecuritySafeCritical]  // auto-generated
+        unsafe internal int GetBytesFromEncoding(byte* pbNativeBuffer, int cbNativeBuffer,Encoding encoding)
+        {
+            // encoding == Encoding.UTF8
+            fixed (char* pwzChar = &this.m_firstChar)
+            {
+                return encoding.GetBytes(pwzChar, m_stringLength, pbNativeBuffer, cbNativeBuffer);
+            }            
+        }
 
         [System.Security.SecuritySafeCritical]  // auto-generated
         unsafe internal int ConvertToAnsi(byte *pbNativeBuffer, int cbNativeBuffer, bool fBestFit, bool fThrowOnUnmappableChar)
index b189fa5..3a5ece6 100644 (file)
@@ -125,6 +125,112 @@ namespace  System.StubHelpers {
         }
     }  // class CSTRMarshaler
 
+    [ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
+    internal static class UTF8Marshaler
+    {
+        const int UTF8_CHAR_SIZE = 3;
+        static internal unsafe IntPtr ConvertToNative(int flags, string strManaged, IntPtr pNativeBuffer)
+        {
+            if (null == strManaged)
+            {
+                return IntPtr.Zero;
+            }
+            StubHelpers.CheckStringLength(strManaged.Length);
+
+            int nb;
+            byte* pbNativeBuffer = (byte*)pNativeBuffer;
+
+            // If we are marshaling into a stack buffer allocated by the ILStub
+            // we will use a "1-pass" mode where we convert the string directly into the unmanaged buffer.   
+            // else we will allocate the precise native heap memory.          
+            if (pbNativeBuffer != null)
+            {
+                // this is the number of bytes allocated by the ILStub.
+                nb = (strManaged.Length + 1) * UTF8_CHAR_SIZE;
+
+                // +1 for the '\0' that we add
+                nb += 1;
+
+                // nb is the actual number of bytes written by Encoding.GetBytes.
+                // use nb to de-limit the string since we are allocating more than 
+                // required on stack
+                nb = strManaged.GetBytesFromEncoding(pbNativeBuffer, nb, Encoding.UTF8);
+            }
+            // required bytes > 260 , allocate required bytes on heap             
+            else
+            {
+                nb = Encoding.UTF8.GetByteCount(strManaged);
+                // + 1 for the null character.
+                pbNativeBuffer = (byte*)Marshal.AllocCoTaskMem(nb + 1);
+                strManaged.GetBytesFromEncoding(pbNativeBuffer, nb, Encoding.UTF8);
+            }
+            pbNativeBuffer[nb] = 0x0;            
+            return (IntPtr)pbNativeBuffer;
+        }
+
+        static internal unsafe string ConvertToManaged(IntPtr cstr)
+        {
+            if (IntPtr.Zero == cstr)
+                return null;
+            int nbBytes = StubHelpers.strlen((sbyte*)cstr);
+            return String.CreateStringFromEncoding((byte*)cstr, nbBytes, Encoding.UTF8);
+        }
+
+        static internal void ClearNative(IntPtr pNative)
+        {
+            if (pNative != IntPtr.Zero)
+            {
+                Win32Native.CoTaskMemFree(pNative);
+            }
+        }
+    }
+
+    [ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
+    internal static class UTF8BufferMarshaler
+    {
+        static internal unsafe IntPtr ConvertToNative(StringBuilder sb, IntPtr pNativeBuffer, int flags)
+        {
+            if (null == sb)
+            {
+                return IntPtr.Zero;
+            }
+
+            // Convert to string first  
+            string strManaged = sb.ToString();
+
+            // Get byte count 
+            int nb = Encoding.UTF8.GetByteCount(strManaged);
+
+            // EmitConvertSpaceCLRToNative allocates memory
+            byte* pbNativeBuffer = (byte*)pNativeBuffer;
+            nb = strManaged.GetBytesFromEncoding(pbNativeBuffer, nb, Encoding.UTF8);
+
+            pbNativeBuffer[nb] = 0x0;
+            return (IntPtr)pbNativeBuffer;
+        }
+
+        static internal unsafe void ConvertToManaged(StringBuilder sb, IntPtr pNative)
+        {
+            if (pNative == null)
+                return;
+
+            int nbBytes = StubHelpers.strlen((sbyte*)pNative);
+            int numChar = Encoding.UTF8.GetCharCount((byte*)pNative, nbBytes);
+
+            // Encoding.UTF8.GetChars throw an argument exception 
+            // if pBuffer is null            
+            if (numChar == 0)
+                return;
+
+            char[] cCharBuffer = new char[numChar];
+            fixed (char* pBuffer = cCharBuffer)
+            {
+                numChar = Encoding.UTF8.GetChars((byte*)pNative, nbBytes, pBuffer, numChar);
+                // replace string builder internal buffer
+                sb.ReplaceBufferInternal(pBuffer, numChar);
+            }
+        }
+    }
 
 #if FEATURE_COMINTEROP
 
index 37ad39e..39465c2 100644 (file)
@@ -770,6 +770,10 @@ do                                                      \
                         case NATIVE_TYPE_LPWSTR:
                             INITFIELDMARSHALER(NFT_STRINGUNI, FieldMarshaler_StringUni, ());
                             break;
+                        
+                        case NATIVE_TYPE_LPUTF8STR:
+                                                       INITFIELDMARSHALER(NFT_STRINGUTF8, FieldMarshaler_StringUtf8, ());
+                                                       break;
 
                         case NATIVE_TYPE_LPTSTR:
                             // We no longer support Win9x so LPTSTR always maps to a Unicode string.
@@ -3158,7 +3162,109 @@ VOID FieldMarshaler_StringAnsi::DestroyNativeImpl(LPVOID pNativeValue) const
         CoTaskMemFree(sz);
 }
 
+//=======================================================================
+// CoTask Utf8 <--> System.String
+// See FieldMarshaler for details.
+//=======================================================================
+VOID FieldMarshaler_StringUtf8::UpdateNativeImpl(OBJECTREF* pCLRValue, LPVOID pNativeValue, OBJECTREF *ppCleanupWorkListOnStack) const
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_COOPERATIVE;
+        INJECT_FAULT(COMPlusThrowOM());
+        PRECONDITION(CheckPointer(pNativeValue));
+    }
+    CONTRACTL_END;
+
+    STRINGREF pString = (STRINGREF)(*pCLRValue);
+    if (pString == NULL)
+    {
+        MAYBE_UNALIGNED_WRITE(pNativeValue, _PTR, NULL);
+    }
+    else
+    {
+        DWORD nc = pString->GetStringLength();
+        if (nc > MAX_SIZE_FOR_INTEROP)
+            COMPlusThrow(kMarshalDirectiveException, IDS_EE_STRING_TOOLONG);
+
+        // Characters would be # of characters + 1 in case left over high surrogate is ?
+        // Max 3 bytes per char for basic multi-lingual plane.          
+        nc = (nc + 1) * 3;
+        // +1 for '\0'
+        LPUTF8  lpBuffer = (LPUTF8)CoTaskMemAlloc(nc + 1);
+        if (!lpBuffer)
+            COMPlusThrowOM();
+
+        // UTF8Marshaler.ConvertToNative
+        MethodDescCallSite convertToNative(METHOD__CUTF8MARSHALER__CONVERT_TO_NATIVE);
+        
+        ARG_SLOT args[] =
+        {
+            ((ARG_SLOT)(CLR_I4)0),
+            ObjToArgSlot(*pCLRValue),
+            PtrToArgSlot(lpBuffer)
+        };
+        convertToNative.Call(args);
+        MAYBE_UNALIGNED_WRITE(pNativeValue, _PTR, lpBuffer);
+    }
+}
+
+
+//=======================================================================
+// CoTask Utf8 <--> System.String
+// See FieldMarshaler for details.
+//=======================================================================
+VOID FieldMarshaler_StringUtf8::UpdateCLRImpl(const VOID *pNativeValue, OBJECTREF *ppProtectedCLRValue, OBJECTREF *ppProtectedOldCLRValue) const
+{
+    CONTRACTL
+    {
+        THROWS;
+    GC_TRIGGERS;
+    MODE_COOPERATIVE;
+    INJECT_FAULT(COMPlusThrowOM());
+    PRECONDITION(CheckPointer(pNativeValue));
+    PRECONDITION(CheckPointer(ppProtectedCLRValue));
+    }
+    CONTRACTL_END;
+
+    STRINGREF pString = NULL;
+    LPCUTF8  sz = (LPCUTF8)MAYBE_UNALIGNED_READ(pNativeValue, _PTR);
+    if (!sz)
+        pString = NULL;
+    else
+    {
+        MethodDescCallSite convertToManaged(METHOD__CUTF8MARSHALER__CONVERT_TO_MANAGED);
+        ARG_SLOT args[] =
+        {
+            PtrToArgSlot(pNativeValue),
+        };
+        pString = convertToManaged.Call_RetSTRINGREF(args);
+    }
+    *((STRINGREF*)ppProtectedCLRValue) = pString;
+}
+
+//=======================================================================
+// CoTask Utf8 <--> System.String
+// See FieldMarshaler for details.
+//=======================================================================
+VOID FieldMarshaler_StringUtf8::DestroyNativeImpl(LPVOID pNativeValue) const
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        PRECONDITION(CheckPointer(pNativeValue));
+    }
+    CONTRACTL_END;
 
+    LPSTR lpBuffer = (LPSTR)MAYBE_UNALIGNED_READ(pNativeValue, _PTR);
+    MAYBE_UNALIGNED_WRITE(pNativeValue, _PTR, NULL);
+    if (lpBuffer)
+        CoTaskMemFree(lpBuffer);
+}
 
 //=======================================================================
 // FixedString <--> System.String
@@ -4607,6 +4713,7 @@ VOID NStructFieldTypeToString(FieldMarshaler* pFM, SString& strNStructFieldType)
         switch (GetNStructFieldType()) { \
         case NFT_STRINGUNI: rettype ((FieldMarshaler_StringUni*)this)->name##Impl args; break; \
         case NFT_STRINGANSI: rettype ((FieldMarshaler_StringAnsi*)this)->name##Impl args; break; \
+        case NFT_STRINGUTF8: rettype ((FieldMarshaler_StringUtf8*)this)->name##Impl args; break; \
         case NFT_FIXEDSTRINGUNI: rettype ((FieldMarshaler_FixedStringUni*)this)->name##Impl args; break; \
         case NFT_FIXEDSTRINGANSI: rettype ((FieldMarshaler_FixedStringAnsi*)this)->name##Impl args; break; \
         case NFT_FIXEDCHARARRAYANSI: rettype ((FieldMarshaler_FixedCharArrayAnsi*)this)->name##Impl args; break; \
@@ -4649,6 +4756,7 @@ VOID NStructFieldTypeToString(FieldMarshaler* pFM, SString& strNStructFieldType)
         switch (GetNStructFieldType()) { \
         case NFT_STRINGUNI: rettype ((FieldMarshaler_StringUni*)this)->name##Impl args; break; \
         case NFT_STRINGANSI: rettype ((FieldMarshaler_StringAnsi*)this)->name##Impl args; break; \
+        case NFT_STRINGUTF8: rettype ((FieldMarshaler_StringUtf8*)this)->name##Impl args; break; \
         case NFT_FIXEDSTRINGUNI: rettype ((FieldMarshaler_FixedStringUni*)this)->name##Impl args; break; \
         case NFT_FIXEDSTRINGANSI: rettype ((FieldMarshaler_FixedStringAnsi*)this)->name##Impl args; break; \
         case NFT_FIXEDCHARARRAYANSI: rettype ((FieldMarshaler_FixedCharArrayAnsi*)this)->name##Impl args; break; \
index 3c06528..9ec7e87 100644 (file)
@@ -757,6 +757,19 @@ public:
     ELEMENT_SIZE_IMPL(sizeof(LPWSTR), sizeof(LPWSTR))
 };
 
+//=======================================================================
+// LPUTF8STR <--> System.String
+//=======================================================================
+class FieldMarshaler_StringUtf8 : public FieldMarshaler
+{
+public:
+
+       VOID UpdateNativeImpl(OBJECTREF* pCLRValue, LPVOID pNativeValue, OBJECTREF *ppCleanupWorkListOnStack) const;
+       VOID UpdateCLRImpl(const VOID *pNativeValue, OBJECTREF *ppProtectedCLRValue, OBJECTREF *ppProtectedOldCLRValue) const;
+       VOID DestroyNativeImpl(LPVOID pNativeValue) const;
+
+       ELEMENT_SIZE_IMPL(sizeof(LPSTR), sizeof(LPSTR))
+};
 
 //=======================================================================
 // LPSTR <--> System.String
index 8062cfd..ebd8250 100644 (file)
@@ -507,6 +507,155 @@ void ILOptimizedAllocMarshaler::EmitClearNative(ILCodeStream* pslILEmit)
     }
 }
 
+LocalDesc ILUTF8BufferMarshaler::GetManagedType()
+{
+    STANDARD_VM_CONTRACT;
+    return LocalDesc(MscorlibBinder::GetClass(CLASS__STRING_BUILDER));
+}
+
+void ILUTF8BufferMarshaler::EmitConvertSpaceCLRToNative(ILCodeStream* pslILEmit)
+{
+    STANDARD_VM_CONTRACT;
+
+    ILCodeLabel* pNullRefLabel = pslILEmit->NewCodeLabel();
+
+    pslILEmit->EmitLoadNullPtr();
+    EmitStoreNativeValue(pslILEmit);
+
+    EmitLoadManagedValue(pslILEmit);
+    pslILEmit->EmitBRFALSE(pNullRefLabel);
+
+    EmitLoadManagedValue(pslILEmit);
+    // int System.Text.StringBuilder.get_Capacity()
+    pslILEmit->EmitCALL(METHOD__STRING_BUILDER__GET_CAPACITY, 1, 1);
+    pslILEmit->EmitDUP();
+
+    // static void StubHelpers.CheckStringLength(int length)
+    pslILEmit->EmitCALL(METHOD__STUBHELPERS__CHECK_STRING_LENGTH, 1, 0);
+
+    // Max number of bytes for UTF8 string in BMP plane is ( StringBuilder.Capacity + 1 ) * 3 + 1
+    // first +1 if the high surrogate is '?' and second +1 for null byte.
+
+    // stack: capacity_in_bytes
+    pslILEmit->EmitLDC(1);
+    pslILEmit->EmitADD();
+
+    // stack: capacity
+    pslILEmit->EmitLDC(3);
+    pslILEmit->EmitMUL();
+
+    // stack: offset_of_null
+    DWORD dwTmpOffsetOfSecretNull = pslILEmit->NewLocal(ELEMENT_TYPE_I4);
+    pslILEmit->EmitDUP();
+    pslILEmit->EmitSTLOC(dwTmpOffsetOfSecretNull); // make sure the stack is empty for localloc
+
+    // make space for '\0'
+    pslILEmit->EmitLDC(1);
+    pslILEmit->EmitADD();
+
+    // stack: alloc_size_in_bytes
+    ILCodeLabel *pAllocRejoin = pslILEmit->NewCodeLabel();
+    if (IsCLRToNative(m_dwMarshalFlags) && !IsByref(m_dwMarshalFlags))
+    {
+        ILCodeLabel *pNoOptimize = pslILEmit->NewCodeLabel();
+        m_dwLocalBuffer = pslILEmit->NewLocal(ELEMENT_TYPE_I);
+
+        // LocalBuffer = 0
+        pslILEmit->EmitLoadNullPtr();
+        pslILEmit->EmitSTLOC(m_dwLocalBuffer);
+
+        // if (alloc_size_in_bytes > MAX_LOCAL_BUFFER_LENGTH) goto NoOptimize
+        pslILEmit->EmitDUP();
+        pslILEmit->EmitLDC(MAX_LOCAL_BUFFER_LENGTH);
+        pslILEmit->EmitCGT_UN();
+        pslILEmit->EmitBRTRUE(pNoOptimize);
+
+        pslILEmit->EmitLOCALLOC();
+        pslILEmit->EmitDUP();
+        pslILEmit->EmitSTLOC(m_dwLocalBuffer);
+        pslILEmit->EmitBR(pAllocRejoin);
+
+        pslILEmit->EmitLabel(pNoOptimize);
+    }
+
+    // static IntPtr AllocCoTaskMem(int cb)
+    pslILEmit->EmitCALL(METHOD__MARSHAL__ALLOC_CO_TASK_MEM, 1, 1);
+
+    pslILEmit->EmitLabel(pAllocRejoin);
+
+    // stack: native_addr
+
+    pslILEmit->EmitDUP();
+    EmitStoreNativeValue(pslILEmit);
+
+    pslILEmit->EmitLDLOC(dwTmpOffsetOfSecretNull);
+
+    // stack: native_addr offset_of_null
+    pslILEmit->EmitADD();
+
+    // stack: addr_of_null0    
+    pslILEmit->EmitLDC(0);
+    pslILEmit->EmitSTIND_I1();
+
+    pslILEmit->EmitLabel(pNullRefLabel);
+}
+
+void ILUTF8BufferMarshaler::EmitConvertContentsCLRToNative(ILCodeStream* pslILEmit)
+{
+    STANDARD_VM_CONTRACT;
+    DWORD dwUtf8MarshalFlags =
+        (m_pargs->m_pMarshalInfo->GetBestFitMapping() & 0xFF) |
+        (m_pargs->m_pMarshalInfo->GetThrowOnUnmappableChar() << 8);
+
+    // setup to call UTF8BufferMarshaler.ConvertToNative
+    EmitLoadManagedValue(pslILEmit);
+    EmitLoadNativeValue(pslILEmit);
+    pslILEmit->EmitLDC(dwUtf8MarshalFlags);
+
+    //ConvertToNative(StringBuilder sb,IntPtr pNativeBuffer, int flags)        
+    pslILEmit->EmitCALL(METHOD__UTF8BUFFERMARSHALER__CONVERT_TO_NATIVE, 3, 1);
+    EmitStoreNativeValue(pslILEmit);
+}
+
+void ILUTF8BufferMarshaler::EmitConvertSpaceNativeToCLR(ILCodeStream* pslILEmit)
+{
+    STANDARD_VM_CONTRACT;
+
+    ILCodeLabel* pNullRefLabel = pslILEmit->NewCodeLabel();
+
+    EmitLoadNativeValue(pslILEmit);
+    pslILEmit->EmitBRFALSE(pNullRefLabel);
+
+    if (IsIn(m_dwMarshalFlags) || IsCLRToNative(m_dwMarshalFlags))
+    {
+        EmitLoadNativeValue(pslILEmit);
+        // static int System.StubHelpers.StubHelpers.strlen(sbyte* ptr)
+        pslILEmit->EmitCALL(METHOD__STUBHELPERS__STRLEN, 1, 1);
+    }
+    else
+    {
+        // don't touch the native buffer in the native->CLR out-only case
+        pslILEmit->EmitLDC(0);
+    }
+    // Convert to UTF8 and then call 
+    // System.Text.StringBuilder..ctor(int capacity)
+    pslILEmit->EmitNEWOBJ(METHOD__STRING_BUILDER__CTOR_INT, 1);
+    EmitStoreManagedValue(pslILEmit);
+    pslILEmit->EmitLabel(pNullRefLabel);
+}
+
+void ILUTF8BufferMarshaler::EmitConvertContentsNativeToCLR(ILCodeStream* pslILEmit)
+{
+    STANDARD_VM_CONTRACT;
+
+    EmitLoadManagedValue(pslILEmit);
+    EmitLoadNativeValue(pslILEmit);
+
+    //void UTF8BufferMarshaler.ConvertToManaged(StringBuilder sb, IntPtr pNative)
+    pslILEmit->EmitCALL(METHOD__UTF8BUFFERMARSHALER__CONVERT_TO_MANAGED, 2, 0);
+}
+
+
 LocalDesc ILWSTRBufferMarshaler::GetManagedType()
 {
     STANDARD_VM_CONTRACT;
@@ -1924,6 +2073,99 @@ void ILHSTRINGMarshaler::EmitClearNative(ILCodeStream* pslILEmit)
 
 #endif // FEATURE_COMINTEROP
 
+LocalDesc ILCUTF8Marshaler::GetManagedType()
+{
+       LIMITED_METHOD_CONTRACT;
+
+       return LocalDesc(ELEMENT_TYPE_STRING);
+}
+
+void ILCUTF8Marshaler::EmitConvertContentsCLRToNative(ILCodeStream* pslILEmit)
+{
+       STANDARD_VM_CONTRACT;
+
+       DWORD dwUtf8MarshalFlags =
+               (m_pargs->m_pMarshalInfo->GetBestFitMapping() & 0xFF) |
+               (m_pargs->m_pMarshalInfo->GetThrowOnUnmappableChar() << 8);
+
+       bool bPassByValueInOnly = IsIn(m_dwMarshalFlags) && !IsOut(m_dwMarshalFlags) && !IsByref(m_dwMarshalFlags);
+       if (bPassByValueInOnly)
+       {
+               DWORD dwBufSize = pslILEmit->NewLocal(ELEMENT_TYPE_I4);
+               m_dwLocalBuffer = pslILEmit->NewLocal(ELEMENT_TYPE_I);
+
+               // LocalBuffer = 0
+               pslILEmit->EmitLoadNullPtr();
+               pslILEmit->EmitSTLOC(m_dwLocalBuffer);
+
+               ILCodeLabel* pNoOptimize = pslILEmit->NewCodeLabel();
+
+               // if == NULL, goto NoOptimize
+               EmitLoadManagedValue(pslILEmit);
+               pslILEmit->EmitBRFALSE(pNoOptimize);
+                                               
+               // (String.Length + 1)
+               // Characters would be # of characters + 1 in case left over high surrogate is ?
+               EmitLoadManagedValue(pslILEmit);
+               pslILEmit->EmitCALL(METHOD__STRING__GET_LENGTH, 1, 1);
+               pslILEmit->EmitLDC(1);
+               pslILEmit->EmitADD();
+
+               // Max 3 bytes per char.
+               // (String.Length + 1) * 3              
+               pslILEmit->EmitLDC(3);
+               pslILEmit->EmitMUL();
+
+               // +1 for the 0x0 that we put in.
+               // ((String.Length + 1) * 3) + 1
+               pslILEmit->EmitLDC(1);
+               pslILEmit->EmitADD();
+                               
+               // BufSize = ( (String.Length+1) * 3) + 1
+               pslILEmit->EmitSTLOC(dwBufSize);
+
+               // if (MAX_LOCAL_BUFFER_LENGTH < BufSize ) goto NoOptimize
+               pslILEmit->EmitLDC(MAX_LOCAL_BUFFER_LENGTH);
+               pslILEmit->EmitLDLOC(dwBufSize);
+               pslILEmit->EmitCLT();
+               pslILEmit->EmitBRTRUE(pNoOptimize);
+
+               // LocalBuffer = localloc(BufSize);
+               pslILEmit->EmitLDLOC(dwBufSize);
+               pslILEmit->EmitLOCALLOC();
+               pslILEmit->EmitSTLOC(m_dwLocalBuffer);
+
+               // NoOptimize:
+               pslILEmit->EmitLabel(pNoOptimize);
+       }
+
+       // UTF8Marshaler.ConvertToNative(dwUtf8MarshalFlags,pManaged, pLocalBuffer)
+       pslILEmit->EmitLDC(dwUtf8MarshalFlags);
+       EmitLoadManagedValue(pslILEmit);
+
+       if (m_dwLocalBuffer != LOCAL_NUM_UNUSED)
+       {
+               pslILEmit->EmitLDLOC(m_dwLocalBuffer);
+       }
+       else
+       {
+               pslILEmit->EmitLoadNullPtr();
+       }
+
+       pslILEmit->EmitCALL(METHOD__CUTF8MARSHALER__CONVERT_TO_NATIVE, 3, 1);
+
+       EmitStoreNativeValue(pslILEmit);
+}
+
+void ILCUTF8Marshaler::EmitConvertContentsNativeToCLR(ILCodeStream* pslILEmit)
+{
+       STANDARD_VM_CONTRACT;
+
+       EmitLoadNativeValue(pslILEmit);
+       pslILEmit->EmitCALL(METHOD__CUTF8MARSHALER__CONVERT_TO_MANAGED, 1, 1);
+       EmitStoreManagedValue(pslILEmit);
+}
+
 
 LocalDesc ILCSTRMarshaler::GetManagedType()
 {
index d750de1..3ed74b4 100644 (file)
@@ -1996,6 +1996,35 @@ protected:
     DWORD m_dwLocalBuffer;      // localloc'ed temp buffer variable or -1 if not used
 };
 
+class ILUTF8BufferMarshaler : public ILOptimizedAllocMarshaler
+{
+public:
+       enum
+       {
+               c_fInOnly = FALSE,
+               c_nativeSize = sizeof(void *),
+               c_CLRSize = sizeof(OBJECTREF),
+       };
+
+       enum
+       {
+               // If required buffer length > MAX_LOCAL_BUFFER_LENGTH, don't optimize by allocating memory on stack
+               MAX_LOCAL_BUFFER_LENGTH = MAX_PATH_FNAME + 1
+       };
+
+       ILUTF8BufferMarshaler() :
+               ILOptimizedAllocMarshaler(METHOD__WIN32NATIVE__COTASKMEMFREE)
+       {
+               LIMITED_METHOD_CONTRACT;
+       }
+
+       virtual LocalDesc GetManagedType();
+       virtual void EmitConvertSpaceCLRToNative(ILCodeStream* pslILEmit);
+       virtual void EmitConvertContentsCLRToNative(ILCodeStream* pslILEmit);
+       virtual void EmitConvertSpaceNativeToCLR(ILCodeStream* pslILEmit);
+       virtual void EmitConvertContentsNativeToCLR(ILCodeStream* pslILEmit);
+};
+
 class ILWSTRBufferMarshaler : public ILOptimizedAllocMarshaler
 {
 public:
@@ -2522,6 +2551,37 @@ protected:
 };
 #endif // FEATURE_COMINTEROP
 
+
+class ILCUTF8Marshaler : public ILOptimizedAllocMarshaler
+{
+public:
+       enum
+       {
+               c_fInOnly = TRUE,
+               c_nativeSize = sizeof(void *),
+               c_CLRSize = sizeof(OBJECTREF),
+       };
+
+       enum
+       {
+               // If required buffer length > MAX_LOCAL_BUFFER_LENGTH, don't optimize by allocating memory on stack
+               MAX_LOCAL_BUFFER_LENGTH = MAX_PATH_FNAME + 1
+       };
+
+       ILCUTF8Marshaler() :
+               ILOptimizedAllocMarshaler(METHOD__CSTRMARSHALER__CLEAR_NATIVE)
+       {
+               LIMITED_METHOD_CONTRACT;
+       }
+
+protected:
+       virtual LocalDesc GetManagedType();
+       virtual void EmitConvertContentsCLRToNative(ILCodeStream* pslILEmit);
+       virtual void EmitConvertContentsNativeToCLR(ILCodeStream* pslILEmit);
+};
+
+
+
 class ILCSTRMarshaler : public ILOptimizedAllocMarshaler
 {
 public:
index bc2597f..c620046 100644 (file)
@@ -2918,6 +2918,7 @@ bool MethodTable::ClassifyEightBytesWithNativeLayout(SystemVStructRegisterPassin
             case NFT_STRINGUNI:
             case NFT_STRINGANSI:
             case NFT_ANSICHAR:
+            case NFT_STRINGUTF8:
             case NFT_WINBOOL:
             case NFT_CBOOL:
             case NFT_DELEGATE:
index 25f33c2..ab25452 100644 (file)
@@ -2171,6 +2171,10 @@ MarshalInfo::MarshalInfo(Module* pModule,
                         case NATIVE_TYPE_LPSTR:
                             m_type = builder ? MARSHAL_TYPE_LPSTR_BUFFER : MARSHAL_TYPE_LPSTR;
                             break;
+
+                        case NATIVE_TYPE_LPUTF8STR:
+                            m_type = builder ? MARSHAL_TYPE_UTF8_BUFFER : MARSHAL_TYPE_LPUTF8STR;
+                            break;
     
                         case NATIVE_TYPE_LPTSTR:
                         {
@@ -4463,6 +4467,9 @@ VOID MarshalInfo::MarshalTypeToString(SString& strMarshalType, BOOL fSizeIsSpeci
             case MARSHAL_TYPE_LPSTR:
                 strRetVal = W("LPSTR");
                 break;
+            case MARSHAL_TYPE_LPUTF8STR:
+                strRetVal = W("LPUTF8STR");
+                break;
 #ifdef FEATURE_COMINTEROP
             case MARSHAL_TYPE_ANSIBSTR:
                 strRetVal = W("AnsiBStr");
@@ -4474,6 +4481,9 @@ VOID MarshalInfo::MarshalTypeToString(SString& strMarshalType, BOOL fSizeIsSpeci
             case MARSHAL_TYPE_LPSTR_BUFFER:
                 strRetVal = W("LPSTR buffer");
                 break;
+            case MARSHAL_TYPE_UTF8_BUFFER:
+                strRetVal = W("UTF8 buffer");
+                break;
             case MARSHAL_TYPE_ASANYA:
                 strRetVal = W("AsAnyA");
                 break;
index a64269f..9620627 100644 (file)
@@ -2207,6 +2207,15 @@ DEFINE_METHOD(ICASTABLE,        ISINSTANCEOF,       IsInstanceOfInterface, IM_Ru
 DEFINE_METHOD(ICASTABLE,        GETIMPLTYPE,        GetImplType, IM_RuntimeTypeHandle_RetRuntimeTypeHandle)
 #endif // FEATURE_ICASTABLE
 
+DEFINE_CLASS(CUTF8MARSHALER, StubHelpers, UTF8Marshaler)
+DEFINE_METHOD(CUTF8MARSHALER, CONVERT_TO_NATIVE, ConvertToNative, SM_Int_Str_IntPtr_RetIntPtr)
+DEFINE_METHOD(CUTF8MARSHALER, CONVERT_TO_MANAGED, ConvertToManaged, SM_IntPtr_RetStr)
+DEFINE_METHOD(CUTF8MARSHALER, CLEAR_NATIVE, ClearNative, SM_IntPtr_RetVoid)
+
+DEFINE_CLASS(UTF8BUFFERMARSHALER, StubHelpers, UTF8BufferMarshaler)
+DEFINE_METHOD(UTF8BUFFERMARSHALER, CONVERT_TO_NATIVE, ConvertToNative, NoSig)
+DEFINE_METHOD(UTF8BUFFERMARSHALER, CONVERT_TO_MANAGED, ConvertToManaged, NoSig)
+
 #undef DEFINE_CLASS
 #undef DEFINE_METHOD
 #undef DEFINE_FIELD
index 6237e3a..603409b 100644 (file)
@@ -41,6 +41,7 @@ DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_DATE,            DateMarshaler,
  
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_LPWSTR,          WSTRMarshaler,                       false)
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_LPSTR,           CSTRMarshaler,                       false)
+DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_LPUTF8STR,       CUTF8Marshaler,                      false)
 #ifdef FEATURE_COMINTEROP
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_BSTR,            BSTRMarshaler,                       false)
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_ANSIBSTR,        AnsiBSTRMarshaler,                   false)
@@ -53,6 +54,7 @@ DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_PCEVENTARGS,     PCEventArgsMarshaler,
 
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_LPWSTR_BUFFER,   WSTRBufferMarshaler,                 false)
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_LPSTR_BUFFER,    CSTRBufferMarshaler,                 false)
+DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_UTF8_BUFFER,     UTF8BufferMarshaler,                 false)
 
 #if defined(FEATURE_COMINTEROP) || !defined(FEATURE_CORECLR)
 // CoreCLR doesn't have any support for marshalling interface pointers.
index 960d2ae..f5facc7 100644 (file)
@@ -66,7 +66,7 @@ DEFINE_NFT(NFT_DATETIMEOFFSET,              sizeof(INT64),          true)
 DEFINE_NFT(NFT_SYSTEMTYPE,                  sizeof(TypeNameNative), true)  // System.Type -> Windows.UI.Xaml.Interop.TypeName
 DEFINE_NFT(NFT_WINDOWSFOUNDATIONHRESULT,    sizeof(int),            true)  // Windows.Foundation.HResult is marshaled to System.Exception.
 #endif // FEATURE_COMINTEROP
-
+DEFINE_NFT(NFT_STRINGUTF8,                  sizeof(LPVOID),         false)
 DEFINE_NFT(NFT_ILLEGAL,                     1,                      true)
 
 #ifdef FEATURE_COMINTEROP
index af51c5e..370b699 100644 (file)
@@ -14,5 +14,6 @@ add_subdirectory(RefInt)
 add_subdirectory(RefCharArray)
 add_subdirectory(StringMarshalling/LPSTR)
 add_subdirectory(StringMarshalling/LPTSTR)
+add_subdirectory(StringMarshalling/UTF8)
 add_subdirectory(MarshalAPI/FunctionPointer)
 add_subdirectory(MarshalAPI/IUnknown)
diff --git a/tests/src/Interop/StringMarshalling/UTF8/CMakeLists.txt b/tests/src/Interop/StringMarshalling/UTF8/CMakeLists.txt
new file mode 100644 (file)
index 0000000..1e8edbf
--- /dev/null
@@ -0,0 +1,9 @@
+cmake_minimum_required (VERSION 2.6)
+project (UTF8TestNative)
+set(SOURCES UTF8TestNative.cpp )
+
+# add the executable
+add_library (UTF8TestNative SHARED ${SOURCES})
+
+# add the install targets
+install (TARGETS UTF8TestNative DESTINATION bin)
\ No newline at end of file
diff --git a/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs
new file mode 100644 (file)
index 0000000..a8f58b3
Binary files /dev/null and b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs differ
diff --git a/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.csproj b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.csproj
new file mode 100644 (file)
index 0000000..8884419
--- /dev/null
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <AssemblyName>UTF8Test</AssemblyName>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{F1E66554-8C8E-4141-85CF-D0CD6A0CD0B0}</ProjectGuid>
+    <OutputType>exe</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <FileAlignment>512</FileAlignment>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\11.0\UITestExtensionPackages</ReferencePath>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>  
+    <NuGetPackageImportStamp>7a9bfb7d</NuGetPackageImportStamp>
+    <DefineConstants>$(DefineConstants);STATIC</DefineConstants>
+    <ReferenceLocalMscorlib>true</ReferenceLocalMscorlib>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+  </PropertyGroup>
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="*.cs" />    
+    <Compile Include="..\..\common\Assertion.cs" />    
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\..\Common\CoreCLRTestLibrary\CoreCLRTestLibrary.csproj">
+      <Project>{c8c0dc74-fac4-45b1-81fe-70c4808366e0}</Project>
+      <Name>CoreCLRTestLibrary</Name>
+    </ProjectReference>
+    <ProjectReference Include="CMakeLists.txt">
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+</Project>
\ No newline at end of file
diff --git a/tests/src/Interop/StringMarshalling/UTF8/UTF8TestNative.cpp b/tests/src/Interop/StringMarshalling/UTF8/UTF8TestNative.cpp
new file mode 100644 (file)
index 0000000..9759c1a
--- /dev/null
@@ -0,0 +1,319 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include <xplatform.h>
+
+// helper functions
+#ifdef _WIN32  
+char* UTF16ToUTF8(wchar_t * pszTextUTF16)
+{
+    if ((pszTextUTF16 == NULL) || (*pszTextUTF16 == L'\0')) {
+        return 0;
+    }
+
+    size_t cchUTF16;
+    cchUTF16 = wcslen(pszTextUTF16) + 1;
+    int cbUTF8 = WideCharToMultiByte(CP_UTF8, 0,
+        pszTextUTF16,
+        (int)cchUTF16,
+        NULL,
+        0/* request buffer size*/,
+        NULL,
+        NULL);
+    
+    char *pszUTF8 = (char*)CoTaskMemAlloc(sizeof(char) * (cbUTF8 + 1));
+    int nc = WideCharToMultiByte(CP_UTF8, // convert to UTF-8
+        0,       //default flags 
+        pszTextUTF16, //source wide string
+        (int)cchUTF16,     // length of wide string
+        pszUTF8,      // destination buffer 
+        cbUTF8,       // destination buffer size
+        NULL,
+        NULL);
+    
+    if (!nc)
+    {
+        throw;
+    }
+
+    pszUTF8[nc] = '\0';
+    return pszUTF8;
+}
+
+wchar_t* UTF8ToUTF16(const char *utf8)
+{
+    // Special case of empty input string
+    //wszTextUTF16
+    wchar_t *wszTextUTF16 = 0;
+    if (!utf8 || !(*utf8))
+        return wszTextUTF16;
+    size_t szUtf8 = strlen(utf8);
+
+    //Get length (in wchar_t's) of resulting UTF-16 string
+    int cbUTF16 = ::MultiByteToWideChar(
+        CP_UTF8,            // convert from UTF-8
+        0,                  // default flags
+        utf8,        // source UTF-8 string
+        (int)szUtf8,      // length (in chars) of source UTF-8 string
+        NULL,               // unused - no conversion done in this step
+        0                   // request size of destination buffer, in wchar_t's
+    );
+    
+    wszTextUTF16 = (wchar_t*)(CoTaskMemAlloc((cbUTF16 + 1 )  * sizeof(wchar_t) ));
+    // Do the actual conversion from UTF-8 to UTF-16
+    int nc = ::MultiByteToWideChar(
+        CP_UTF8,            // convert from UTF-8
+        0,                  // default flags
+        utf8,        // source UTF-8 string
+        (int)szUtf8,      // length (in chars) of source UTF-8 string
+        wszTextUTF16,          // destination buffer
+        cbUTF16);  // size of destination buffer, in wchar_t's
+
+    if (!nc)
+    {
+        throw;
+    }
+    //MultiByteToWideChar do not null terminate the string when cbMultiByte is not -1
+    wszTextUTF16[nc] = '\0';
+    return wszTextUTF16;
+}
+#endif
+
+
+LPSTR build_return_string(const char* pReturn)
+{
+    char *ret = 0;
+    if (pReturn == 0 || *pReturn == 0)
+        return ret;
+
+    size_t strLength = strlen(pReturn);
+    ret = (LPSTR)(CoTaskMemAlloc(sizeof(char)* (strLength + 1)));
+    memset(ret, '\0', strLength + 1);
+    strncpy_s(ret, strLength + 1, pReturn, strLength);
+    return ret;
+}
+
+// this is the same set as in managed side , but here 
+// string need to be escaped  , still CL applied some local and 
+// end up with different byte sequence.
+
+const int NSTRINGS = 6;
+#ifdef _WIN32  
+wchar_t  *utf8Strings[] = { L"Managed",
+L"S\x00EEne kl\x00E2wen durh die wolken sint geslagen" ,
+L"\x0915\x093E\x091A\x0902 \x0936\x0915\x094D\x0928\x094B\x092E\x094D\x092F\x0924\x094D\x0924\x0941\x092E\x094D \x0964 \x0928\x094B\x092A\x0939\x093F\x0928\x0938\x094D\x0924\x093F \x092E\x093E\x092E\x094D",
+L"\x6211\x80FD\x541E\x4E0B\x73BB\x7483\x800C\x4E0D\x4F24\x8EAB\x4F53",
+L"\x10E6\x10DB\x10D4\x10E0\x10D7\x10E1\x10D8 \x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,\x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4, \x10DC\x10E3\x10D7\x10E3 \x10D9\x10D5\x10DA\x10D0 \x10D3\x10D0\x10DB\x10EE\x10E1\x10DC\x10D0\x10E1 \x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,\x10E1\x10DD\x10E4\x10DA\x10D8\x10E1\x10D0 \x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4, \x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,\x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,\x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,\x10E8\x10E0\x10DD\x10DB\x10D0\x10E1\x10D0, \x10EA\x10D4\x10EA\x10EE\x10DA\x10E1, \x10EC\x10E7\x10D0\x10DA\x10E1\x10D0 \x10D3\x10D0 \x10DB\x10D8\x10EC\x10D0\x10E1\x10D0, \x10F0\x10D0\x10D4\x10E0\x10D7\x10D0 \x10D7\x10D0\x10DC\x10D0 \x10DB\x10E0\x10DD\x10DB\x10D0\x10E1\x10D0; \x10DB\x10DD\x10DB\x10EA\x10DC\x10D4\x10E1 \x10E4\x10E0\x10D7\x10D4\x10DC\x10D8 \x10D3\x10D0 \x10D0\x10E6\x10D5\x10E4\x10E0\x10D8\x10DC\x10D3\x10D4, \x10DB\x10D8\x10D5\x10F0\x10EE\x10D5\x10D3\x10D4 \x10DB\x10D0\x10E1 \x10E9\x10D4\x10DB\x10E1\x10D0 \x10DC\x10D3\x10DD\x10DB\x10D0\x10E1\x10D0, \x10D3\x10E6\x10D8\x10E1\x10D8\x10D7 \x10D3\x10D0 \x10E6\x10D0\x10DB\x10D8\x10D7 \x10D5\x10F0\x10EE\x10D4\x10D3\x10D5\x10D8\x10D3\x10D4 \x10DB\x10D6\x10D8\x10E1\x10D0 \x10D4\x10DA\x10D5\x10D0\x10D7\x10D0 \x10D9\x10E0\x10D7\x10DD\x10DB\x10D0\x10D0\x10E1\x10D0\x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,\x10E8\x10D4\x10DB\x10D5\x10D4\x10D3\x10E0\x10D4,",
+L"\x03A4\x03B7 \x03B3\x03BB\x03CE\x03C3\x03C3\x03B1 \x03BC\x03BF\x03C5 \x03AD\x03B4\x03C9\x03C3\x03B1\x03BD \x03B5\x03BB\x03BB\x03B7\x03BD\x03B9\x03BA\x03AE",
+L"\0"
+};
+
+#else
+//test strings
+const char  *utf8Strings[] = { "Managed",
+"Sîne klâwen durh die wolken sint geslagen",
+"काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम्",
+"我能吞下玻璃而不伤身体",
+"ღმერთსი შემვედრე,შემვედრე, ნუთუ კვლა დამხსნას შემვედრე,სოფლისა შემვედრე, შემვედრე,შემვედრე,შემვედრე,შრომასა, ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასაშემვედრე,შემვედრე,",
+"Τη γλώσσα μου έδωσαν ελληνική",
+"\0"
+};
+#endif
+
+// Modify the string builder in place, managed side validates.
+extern "C" DLL_EXPORT void __cdecl StringBuilderParameterInOut(/*[In,Out] StringBuilder*/ char *s, int index)
+{
+    // if string.empty 
+    if (s == 0 || *s == 0)
+        return;
+
+#ifdef _WIN32
+    char *pszTextutf8 = UTF16ToUTF8(utf8Strings[index]);
+#else
+    char *pszTextutf8 = (char*)utf8Strings[index];
+#endif
+
+    // do byte by byte validation of in string
+    size_t szLen = strlen(s);
+    for (size_t i = 0; i < szLen; i++) 
+    {
+        if (s[i] != pszTextutf8[i])
+        {
+            printf("[in] managed string do not match native string\n");
+            throw;
+        }
+    }  
+
+    // modify the string inplace 
+    size_t outLen = strlen(pszTextutf8);
+    for (size_t i = 0; i < outLen; i++) {
+        s[i] = pszTextutf8[i];
+    }
+    s[outLen] = '\0';
+#ifdef _WIN32
+    CoTaskMemFree(pszTextutf8);
+#endif
+}
+
+//out string builder
+extern "C" DLL_EXPORT void __cdecl  StringBuilderParameterOut(/*[In,Out] StringBuilder*/ char *s, int index)
+{
+
+#ifdef _WIN32
+    char *pszTextutf8 = UTF16ToUTF8(utf8Strings[index]);
+#else 
+    char *pszTextutf8 = (char*)utf8Strings[index];
+#endif
+    // modify the string inplace 
+    size_t outLen = strlen(pszTextutf8);
+    for (size_t i = 0; i < outLen; i++) {
+        s[i] = pszTextutf8[i];
+    }
+    s[outLen] = '\0';
+#ifdef _WIN32
+    CoTaskMemFree(pszTextutf8);
+#endif
+}
+
+// return utf8 stringbuilder
+extern "C" DLL_EXPORT char* __cdecl  StringBuilderParameterReturn(int index) {
+
+#ifdef _WIN32
+    char *pszTextutf8 = UTF16ToUTF8(utf8Strings[index]);
+#else
+    char *pszTextutf8 = (char*)utf8Strings[index];
+#endif
+    size_t strLength = strlen(pszTextutf8);
+    LPSTR ret = (LPSTR)(CoTaskMemAlloc(sizeof(char)* (strLength + 1)));
+    memcpy(ret, pszTextutf8, strLength);
+    ret[strLength] = '\0';
+
+#ifdef _WIN32
+    CoTaskMemFree(pszTextutf8);
+#endif
+
+    return  ret;
+}
+
+extern "C" DLL_EXPORT LPSTR __cdecl StringParameterOut(/*[Out]*/ char *s, int index)
+{
+    // return a copy
+    return build_return_string(s);
+}
+
+// string 
+extern "C" DLL_EXPORT LPSTR __cdecl StringParameterInOut(/*[In,Out]*/ char *s, int index)
+{
+    // return a copy
+    return build_return_string(s);
+}
+
+// Utf8 field
+typedef struct FieldWithUtf8
+{
+    char *pFirst;
+    int index;
+}FieldWithUtf8;
+
+//utf8 struct field
+extern "C" DLL_EXPORT void _cdecl TestStructWithUtf8Field(struct FieldWithUtf8 fieldStruct)
+{
+    char *pszManagedutf8 = fieldStruct.pFirst;
+    int stringIndex = fieldStruct.index;
+    char *pszNative = 0;
+    size_t outLen = 0;
+
+    if (pszManagedutf8 == 0 || *pszManagedutf8 == 0)
+        return;
+
+#ifdef _WIN32
+    pszNative = UTF16ToUTF8(utf8Strings[stringIndex]);
+#else 
+    pszNative = (char*)utf8Strings[stringIndex];
+#endif
+    outLen = strlen(pszNative);
+    // do byte by byte comparision
+    for (size_t i = 0; i < outLen; i++) 
+    {
+        if (pszNative[i] != pszManagedutf8[i]) 
+        {
+            printf("Native and managed string do not match.\n");
+            throw;
+        }
+    }
+#ifdef _WIN32
+    CoTaskMemFree(pszNative);
+#endif
+}
+
+// test c# out keyword
+extern "C" DLL_EXPORT void __cdecl StringParameterRefOut(/*out*/ char **s, int index)
+{
+#ifdef _WIN32
+    char *pszTextutf8 = UTF16ToUTF8(utf8Strings[index]);
+#else
+    char *pszTextutf8 = (char*)utf8Strings[index];
+#endif      
+    size_t strLength = strlen(pszTextutf8);
+     *s = (LPSTR)(CoTaskMemAlloc(sizeof(char)* (strLength + 1)));
+    memcpy(*s, pszTextutf8, strLength);
+    (*s)[strLength] = '\0';
+#ifdef _WIN32
+    CoTaskMemFree(pszTextutf8);
+#endif
+}
+
+//c# ref
+extern "C" DLL_EXPORT void __cdecl StringParameterRef(/*ref*/ char **s, int index)
+{
+#ifdef _WIN32
+    char *pszTextutf8 = UTF16ToUTF8(utf8Strings[index]);
+#else
+    char *pszTextutf8 = (char*)utf8Strings[index];
+#endif
+    size_t strLength = strlen(pszTextutf8);
+    // do byte by byte validation of in string
+    size_t szLen = strlen(*s);
+    for (size_t i = 0; i < szLen; i++)
+    {
+        if ((*s)[i] != pszTextutf8[i])
+        {
+            printf("[in] managed string do not match native string\n");
+            throw;
+        }
+    }
+
+    // overwrite the orginal 
+    *s = (LPSTR)(CoTaskMemAlloc(sizeof(char)* (strLength + 1)));
+    memcpy(*s, pszTextutf8, strLength);
+    (*s)[strLength] = '\0';
+#ifdef _WIN32
+    CoTaskMemFree(pszTextutf8);
+#endif
+}
+
+extern "C" DLL_EXPORT void __cdecl StringParameterLPStr(/*out*/ char **s)
+{
+    const char *managed = "ManagedString";
+    size_t strLength = strlen(managed);
+    *s = (LPSTR)(CoTaskMemAlloc(sizeof(char)* (strLength + 1)));
+    memcpy(*s, managed, strLength);
+    (*s)[strLength] = '\0';
+}
+
+// delegate test
+typedef void (__cdecl * Callback)(char *text, int index);
+extern "C" DLL_EXPORT void _cdecl Utf8DelegateAsParameter(Callback managedCallback)
+{
+    for (int i = 0; i < NSTRINGS; ++i) 
+    {
+        char *pszNative = 0;
+#ifdef _WIN32
+        pszNative = UTF16ToUTF8(utf8Strings[i]);
+#else 
+        pszNative = (char*)utf8Strings[i];
+#endif
+        managedCallback(pszNative, i);
+    }
+}
\ No newline at end of file
diff --git a/tests/src/Interop/StringMarshalling/UTF8/project.json b/tests/src/Interop/StringMarshalling/UTF8/project.json
new file mode 100644 (file)
index 0000000..fc3f026
--- /dev/null
@@ -0,0 +1,15 @@
+{
+  "dependencies": {},
+  "frameworks": {
+    "dnxcore50": {}
+  },
+  "runtimes": {
+    "win7-x86": {},
+    "win7-x64": {},
+    "ubuntu.14.04-x64": {},
+    "osx.10.10-x64": {},
+    "centos.7-x64": {},
+    "rhel.7-x64": {},
+    "debian.8.2-x64": {}
+  }
+}