From 2c0a2c05ba82460a8d8a4b1e2d98e908e59d5d54 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Tue, 15 Nov 2016 17:35:36 -0800 Subject: [PATCH] Full support of idn mapping (#8134) * Enable full implementation of IdnMapping in corelib * Add missing resources --- .../System.Globalization.Native/Interop.Idna.cs | 21 +++ .../corefx/Interop/Windows/mincore/Interop.Idna.cs | 37 +++++ src/mscorlib/corefx/SR.cs | 25 ++++ .../corefx/System/Globalization/IdnMapping.Unix.cs | 134 ++++++++++++++++++ .../System/Globalization/IdnMapping.Windows.cs | 113 +++++++++++++++ .../corefx/System/Globalization/IdnMapping.cs | 152 +++++++++++++++++++++ src/mscorlib/corefx/System/Globalization/STUBS.cs | 17 --- src/mscorlib/mscorlib.shared.sources.props | 4 +- 8 files changed, 485 insertions(+), 18 deletions(-) create mode 100644 src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Idna.cs create mode 100644 src/mscorlib/corefx/Interop/Windows/mincore/Interop.Idna.cs create mode 100644 src/mscorlib/corefx/System/Globalization/IdnMapping.Unix.cs create mode 100644 src/mscorlib/corefx/System/Globalization/IdnMapping.Windows.cs create mode 100644 src/mscorlib/corefx/System/Globalization/IdnMapping.cs delete mode 100644 src/mscorlib/corefx/System/Globalization/STUBS.cs diff --git a/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Idna.cs b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Idna.cs new file mode 100644 index 0000000..43c7228 --- /dev/null +++ b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Idna.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.InteropServices; + +internal static partial class Interop +{ + internal static partial class GlobalizationInterop + { + internal const int AllowUnassigned = 0x1; + internal const int UseStd3AsciiRules = 0x2; + + [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_ToAscii")] + internal static unsafe extern int ToAscii(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + + [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_ToUnicode")] + internal static unsafe extern int ToUnicode(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + } +} diff --git a/src/mscorlib/corefx/Interop/Windows/mincore/Interop.Idna.cs b/src/mscorlib/corefx/Interop/Windows/mincore/Interop.Idna.cs new file mode 100644 index 0000000..e14f16b --- /dev/null +++ b/src/mscorlib/corefx/Interop/Windows/mincore/Interop.Idna.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.InteropServices; + +internal partial class Interop +{ + internal partial class mincore + { + // + // Idn APIs + // + + [DllImport("api-ms-win-core-localization-l1-2-0.dll", CharSet = CharSet.Unicode, SetLastError = true)] + internal static extern int IdnToAscii( + uint dwFlags, + IntPtr lpUnicodeCharStr, + int cchUnicodeChar, + [System.Runtime.InteropServices.OutAttribute()] + IntPtr lpASCIICharStr, + int cchASCIIChar); + + [DllImport("api-ms-win-core-localization-l1-2-0.dll", CharSet = CharSet.Unicode, SetLastError = true)] + internal static extern int IdnToUnicode( + uint dwFlags, + IntPtr lpASCIICharStr, + int cchASCIIChar, + [System.Runtime.InteropServices.OutAttribute()] + IntPtr lpUnicodeCharStr, + int cchUnicodeChar); + + internal const int IDN_ALLOW_UNASSIGNED = 0x1; + internal const int IDN_USE_STD3_ASCII_RULES = 0x2; + } +} diff --git a/src/mscorlib/corefx/SR.cs b/src/mscorlib/corefx/SR.cs index e963e47..d234c9d 100644 --- a/src/mscorlib/corefx/SR.cs +++ b/src/mscorlib/corefx/SR.cs @@ -96,6 +96,11 @@ internal static class SR get { return Environment.GetResourceString("ArgumentOutOfRange_Index"); } } + public static string ArgumentOutOfRange_IndexCountBuffer + { + get { return Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"); } + } + public static string ArgumentOutOfRange_InvalidEraValue { get { return Environment.GetResourceString("ArgumentOutOfRange_InvalidEraValue"); } @@ -176,6 +181,21 @@ internal static class SR get { return Environment.GetResourceString("Argument_EmptyDecString"); } } + public static string Argument_IdnBadLabelSize + { + get { return Environment.GetResourceString("Argument_IdnBadLabelSize"); } + } + + public static string Argument_IdnBadPunycode + { + get { return Environment.GetResourceString("Argument_IdnBadPunycode"); } + } + + public static string Argument_IdnIllegalName + { + get { return Environment.GetResourceString("Argument_IdnIllegalName"); } + } + public static string Argument_InvalidArrayLength { get { return Environment.GetResourceString("Argument_InvalidArrayLength"); } @@ -186,6 +206,11 @@ internal static class SR get { return Environment.GetResourceString("Argument_InvalidCalendar"); } } + public static string Argument_InvalidCharSequence + { + get { return Environment.GetResourceString("Argument_InvalidCharSequence"); } + } + public static string Argument_InvalidCultureName { get { return Environment.GetResourceString("Argument_InvalidCultureName"); } diff --git a/src/mscorlib/corefx/System/Globalization/IdnMapping.Unix.cs b/src/mscorlib/corefx/System/Globalization/IdnMapping.Unix.cs new file mode 100644 index 0000000..58f4cca --- /dev/null +++ b/src/mscorlib/corefx/System/Globalization/IdnMapping.Unix.cs @@ -0,0 +1,134 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Globalization +{ + sealed partial class IdnMapping + { + private unsafe string GetAsciiCore(char* unicode, int count) + { + uint flags = Flags; + CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); + + const int StackallocThreshold = 512; + // Each unicode character is represented by up to 3 ASCII chars + // and the whole string is prefixed by "xn--" (length 4) + int estimatedLength = (int)Math.Min(count * 3L + 4, StackallocThreshold); + int actualLength; + if (estimatedLength < StackallocThreshold) + { + char* outputStack = stackalloc char[estimatedLength]; + actualLength = Interop.GlobalizationInterop.ToAscii(flags, unicode, count, outputStack, estimatedLength); + if (actualLength > 0 && actualLength <= estimatedLength) + { + return new string(outputStack, 0, actualLength); + } + } + else + { + actualLength = Interop.GlobalizationInterop.ToAscii(flags, unicode, count, null, 0); + } + if (actualLength == 0) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); + } + + char[] outputHeap = new char[actualLength]; + fixed (char* pOutputHeap = outputHeap) + { + actualLength = Interop.GlobalizationInterop.ToAscii(flags, unicode, count, pOutputHeap, actualLength); + if (actualLength == 0 || actualLength > outputHeap.Length) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); + } + return new string(pOutputHeap, 0, actualLength); + } + } + + private unsafe string GetUnicodeCore(char* ascii, int count) + { + uint flags = Flags; + CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); + + const int StackAllocThreshold = 512; + if (count < StackAllocThreshold) + { + char* output = stackalloc char[count]; + return GetUnicodeCore(ascii, count, flags, output, count, reattempt: true); + } + else + { + char[] output = new char[count]; + fixed (char* pOutput = output) + { + return GetUnicodeCore(ascii, count, flags, pOutput, count, reattempt: true); + } + } + } + + private unsafe string GetUnicodeCore(char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt) + { + int realLen = Interop.GlobalizationInterop.ToUnicode(flags, ascii, count, output, outputLength); + + if (realLen == 0) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } + else if (realLen <= outputLength) + { + return new string(output, 0, realLen); + } + else if (reattempt) + { + char[] newOutput = new char[realLen]; + fixed (char* pNewOutput = newOutput) + { + return GetUnicodeCore(ascii, count, flags, pNewOutput, realLen, reattempt: false); + } + } + + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } + + // ----------------------------- + // ---- PAL layer ends here ---- + // ----------------------------- + + private uint Flags + { + get + { + int flags = + (AllowUnassigned ? Interop.GlobalizationInterop.AllowUnassigned : 0) | + (UseStd3AsciiRules ? Interop.GlobalizationInterop.UseStd3AsciiRules : 0); + return (uint)flags; + } + } + + /// + /// ICU doesn't check for invalid characters unless the STD3 rules option + /// is enabled. + /// + /// To match Windows behavior, we walk the string ourselves looking for these + /// bad characters so we can continue to throw ArgumentException in these cases. + /// + private static unsafe void CheckInvalidIdnCharacters(char* s, int count, uint flags, string paramName) + { + if ((flags & Interop.GlobalizationInterop.UseStd3AsciiRules) == 0) + { + for (int i = 0; i < count; i++) + { + char c = s[i]; + + // These characters are prohibited regardless of the UseStd3AsciiRules property. + // See https://msdn.microsoft.com/en-us/library/system.globalization.idnmapping.usestd3asciirules(v=vs.110).aspx + if (c <= 0x1F || c == 0x7F) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, paramName); + } + } + } + } + } +} diff --git a/src/mscorlib/corefx/System/Globalization/IdnMapping.Windows.cs b/src/mscorlib/corefx/System/Globalization/IdnMapping.Windows.cs new file mode 100644 index 0000000..f39457b --- /dev/null +++ b/src/mscorlib/corefx/System/Globalization/IdnMapping.Windows.cs @@ -0,0 +1,113 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace System.Globalization +{ + sealed partial class IdnMapping + { + private unsafe string GetAsciiCore(char* unicode, int count) + { + uint flags = Flags; + + // Determine the required length + int length = Interop.mincore.IdnToAscii(flags, new IntPtr(unicode), count, IntPtr.Zero, 0); + if (length == 0) + { + ThrowForZeroLength(nameof(unicode), SR.Argument_IdnIllegalName, SR.Argument_InvalidCharSequenceNoIndex); + } + + // Do the conversion + const int StackAllocThreshold = 512; // arbitrary limit to switch from stack to heap allocation + if (length < StackAllocThreshold) + { + char* output = stackalloc char[length]; + return GetAsciiCore(unicode, count, flags, output, length); + } + else + { + char[] output = new char[length]; + fixed (char* pOutput = output) + { + return GetAsciiCore(unicode, count, flags, pOutput, length); + } + } + } + + private unsafe string GetAsciiCore(char* unicode, int count, uint flags, char* output, int outputLength) + { + int length = Interop.mincore.IdnToAscii(flags, new IntPtr(unicode), count, new IntPtr(output), outputLength); + if (length == 0) + { + ThrowForZeroLength(nameof(unicode), SR.Argument_IdnIllegalName, SR.Argument_InvalidCharSequenceNoIndex); + } + Debug.Assert(length == outputLength); + return new string(output, 0, length); + } + + private unsafe string GetUnicodeCore(char* ascii, int count) + { + uint flags = Flags; + + // Determine the required length + int length = Interop.mincore.IdnToUnicode(flags, new IntPtr(ascii), count, IntPtr.Zero, 0); + if (length == 0) + { + ThrowForZeroLength(nameof(ascii), SR.Argument_IdnIllegalName, SR.Argument_IdnBadPunycode); + } + + // Do the conversion + const int StackAllocThreshold = 512; // arbitrary limit to switch from stack to heap allocation + if (length < StackAllocThreshold) + { + char* output = stackalloc char[length]; + return GetUnicodeCore(ascii, count, flags, output, length); + } + else + { + char[] output = new char[length]; + fixed (char* pOutput = output) + { + return GetUnicodeCore(ascii, count, flags, pOutput, length); + } + } + } + + private unsafe string GetUnicodeCore(char* ascii, int count, uint flags, char* output, int outputLength) + { + int length = Interop.mincore.IdnToUnicode(flags, new IntPtr(ascii), count, new IntPtr(output), outputLength); + if (length == 0) + { + ThrowForZeroLength(nameof(ascii), SR.Argument_IdnIllegalName, SR.Argument_IdnBadPunycode); + } + Debug.Assert(length == outputLength); + return new string(output, 0, length); + } + + // ----------------------------- + // ---- PAL layer ends here ---- + // ----------------------------- + + private uint Flags + { + get + { + int flags = + (AllowUnassigned ? Interop.mincore.IDN_ALLOW_UNASSIGNED : 0) | + (UseStd3AsciiRules ? Interop.mincore.IDN_USE_STD3_ASCII_RULES : 0); + return (uint)flags; + } + } + + private static void ThrowForZeroLength(string paramName, string invalidNameString, string otherString) + { + throw new ArgumentException( + Marshal.GetLastWin32Error() == Interop.ERROR_INVALID_NAME ? invalidNameString : otherString, + paramName); + } + } +} + diff --git a/src/mscorlib/corefx/System/Globalization/IdnMapping.cs b/src/mscorlib/corefx/System/Globalization/IdnMapping.cs new file mode 100644 index 0000000..8424472 --- /dev/null +++ b/src/mscorlib/corefx/System/Globalization/IdnMapping.cs @@ -0,0 +1,152 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// This file contains the IDN functions and implementation. +// +// This allows encoding of non-ASCII domain names in a "punycode" form, +// for example: +// +// \u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS +// +// is encoded as: +// +// xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n +// +// Additional options are provided to allow unassigned IDN characters and +// to validate according to the Std3ASCII Rules (like DNS names). +// +// There are also rules regarding bidirectionality of text and the length +// of segments. +// +// For additional rules see also: +// RFC 3490 - Internationalizing Domain Names in Applications (IDNA) +// RFC 3491 - Nameprep: A Stringprep Profile for Internationalized Domain Names (IDN) +// RFC 3492 - Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA) + +using System.Diagnostics.Contracts; + +namespace System.Globalization +{ + // IdnMapping class used to map names to Punycode + public sealed partial class IdnMapping + { + private bool _allowUnassigned; + private bool _useStd3AsciiRules; + + public IdnMapping() + { + } + + public bool AllowUnassigned + { + get { return _allowUnassigned; } + set { _allowUnassigned = value; } + } + + public bool UseStd3AsciiRules + { + get { return _useStd3AsciiRules; } + set { _useStd3AsciiRules = value; } + } + + // Gets ASCII (Punycode) version of the string + public string GetAscii(string unicode) + { + return GetAscii(unicode, 0); + } + + public string GetAscii(string unicode, int index) + { + if (unicode == null) + throw new ArgumentNullException(nameof(unicode)); + Contract.EndContractBlock(); + return GetAscii(unicode, index, unicode.Length - index); + } + + public string GetAscii(string unicode, int index, int count) + { + if (unicode == null) + throw new ArgumentNullException(nameof(unicode)); + if (index < 0 || count < 0) + throw new ArgumentOutOfRangeException((index < 0) ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum); + if (index > unicode.Length) + throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index); + if (index > unicode.Length - count) + throw new ArgumentOutOfRangeException(nameof(unicode), SR.ArgumentOutOfRange_IndexCountBuffer); + Contract.EndContractBlock(); + + if (count == 0) + { + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + } + if (unicode[index + count - 1] == 0) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, index + count - 1), nameof(unicode)); + } + + unsafe + { + fixed (char* pUnicode = unicode) + { + return GetAsciiCore(pUnicode + index, count); + } + } + } + + // Gets Unicode version of the string. Normalized and limited to IDNA characters. + public string GetUnicode(string ascii) + { + return GetUnicode(ascii, 0); + } + + public string GetUnicode(string ascii, int index) + { + if (ascii == null) + throw new ArgumentNullException(nameof(ascii)); + Contract.EndContractBlock(); + return GetUnicode(ascii, index, ascii.Length - index); + } + + public string GetUnicode(string ascii, int index, int count) + { + if (ascii == null) + throw new ArgumentNullException(nameof(ascii)); + if (index < 0 || count < 0) + throw new ArgumentOutOfRangeException((index < 0) ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum); + if (index > ascii.Length) + throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index); + if (index > ascii.Length - count) + throw new ArgumentOutOfRangeException(nameof(ascii), SR.ArgumentOutOfRange_IndexCountBuffer); + + // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. + // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. + // The Win32 APIs fail on an embedded null, but not on a terminating null. + if (count > 0 && ascii[index + count - 1] == (char)0) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + Contract.EndContractBlock(); + + unsafe + { + fixed (char* pAscii = ascii) + { + return GetUnicodeCore(pAscii + index, count); + } + } + } + + public override bool Equals(object obj) + { + IdnMapping that = obj as IdnMapping; + return + that != null && + _allowUnassigned == that._allowUnassigned && + _useStd3AsciiRules == that._useStd3AsciiRules; + } + + public override int GetHashCode() + { + return (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000); + } + } +} diff --git a/src/mscorlib/corefx/System/Globalization/STUBS.cs b/src/mscorlib/corefx/System/Globalization/STUBS.cs deleted file mode 100644 index 988d837..0000000 --- a/src/mscorlib/corefx/System/Globalization/STUBS.cs +++ /dev/null @@ -1,17 +0,0 @@ -namespace System.Globalization -{ - public sealed partial class IdnMapping - { - public IdnMapping() { } - public bool AllowUnassigned { get { throw new NotImplementedException(); } set { throw new NotImplementedException(); } } - public bool UseStd3AsciiRules { get { throw new NotImplementedException(); } set { throw new NotImplementedException(); } } - public override bool Equals(object obj) { throw new NotImplementedException(); } - public string GetAscii(string unicode) { throw new NotImplementedException(); } - public string GetAscii(string unicode, int index) { throw new NotImplementedException(); } - public string GetAscii(string unicode, int index, int count) { throw new NotImplementedException(); } - public override int GetHashCode() { throw new NotImplementedException(); } - public string GetUnicode(string ascii) { throw new NotImplementedException(); } - public string GetUnicode(string ascii, int index) { throw new NotImplementedException(); } - public string GetUnicode(string ascii, int index, int count) { throw new NotImplementedException(); } - } -} \ No newline at end of file diff --git a/src/mscorlib/mscorlib.shared.sources.props b/src/mscorlib/mscorlib.shared.sources.props index 1ace0ce..8253994 100644 --- a/src/mscorlib/mscorlib.shared.sources.props +++ b/src/mscorlib/mscorlib.shared.sources.props @@ -620,7 +620,6 @@ - @@ -643,6 +642,7 @@ + @@ -672,6 +672,7 @@ + @@ -683,6 +684,7 @@ + -- 2.7.4