1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Runtime.CompilerServices;
7 using System.Runtime.InteropServices;
10 namespace System.Globalization
12 public partial class CompareInfo
15 private Interop.Globalization.SafeSortHandle _sortHandle;
18 private bool _isAsciiEqualityOrdinal;
20 private void InitSort(CultureInfo culture)
22 _sortName = culture.SortName;
26 _isAsciiEqualityOrdinal = true;
30 Interop.Globalization.ResultCode resultCode = Interop.Globalization.GetSortHandle(GetNullTerminatedUtf8String(_sortName), out _sortHandle);
31 if (resultCode != Interop.Globalization.ResultCode.Success)
33 _sortHandle.Dispose();
35 if (resultCode == Interop.Globalization.ResultCode.OutOfMemory)
36 throw new OutOfMemoryException();
38 throw new ExternalException(SR.Arg_ExternalException);
40 _isAsciiEqualityOrdinal = (_sortName == "en-US" || _sortName == "");
44 internal static unsafe int IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
46 Debug.Assert(!GlobalizationMode.Invariant);
48 Debug.Assert(source != null);
49 Debug.Assert(value != null);
51 if (value.Length == 0)
56 if (count < value.Length)
63 fixed (char* pSource = source)
65 int index = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false);
72 int endIndex = startIndex + (count - value.Length);
73 for (int i = startIndex; i <= endIndex; i++)
75 int valueIndex, sourceIndex;
77 for (valueIndex = 0, sourceIndex = i;
78 valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
79 valueIndex++, sourceIndex++) ;
81 if (valueIndex == value.Length)
90 internal static unsafe int IndexOfOrdinalCore(ReadOnlySpan<char> source, ReadOnlySpan<char> value, bool ignoreCase)
92 Debug.Assert(!GlobalizationMode.Invariant);
94 Debug.Assert(source.Length != 0);
95 Debug.Assert(value.Length != 0);
97 if (source.Length < value.Length)
104 fixed (char* pSource = &MemoryMarshal.GetReference(source))
105 fixed (char* pValue = &MemoryMarshal.GetReference(value))
107 int index = Interop.Globalization.IndexOfOrdinalIgnoreCase(pValue, value.Length, pSource, source.Length, findLast: false);
112 int endIndex = source.Length - value.Length;
113 for (int i = 0; i <= endIndex; i++)
115 int valueIndex, sourceIndex;
117 for (valueIndex = 0, sourceIndex = i;
118 valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
119 valueIndex++, sourceIndex++)
122 if (valueIndex == value.Length)
131 internal static unsafe int LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
133 Debug.Assert(!GlobalizationMode.Invariant);
135 Debug.Assert(source != null);
136 Debug.Assert(value != null);
138 if (value.Length == 0)
143 if (count < value.Length)
148 // startIndex is the index into source where we start search backwards from.
149 // leftStartIndex is the index into source of the start of the string that is
150 // count characters away from startIndex.
151 int leftStartIndex = startIndex - count + 1;
155 fixed (char* pSource = source)
157 int lastIndex = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
158 return lastIndex != -1 ?
159 leftStartIndex + lastIndex :
164 for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
166 int valueIndex, sourceIndex;
168 for (valueIndex = 0, sourceIndex = i;
169 valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
170 valueIndex++, sourceIndex++) ;
172 if (valueIndex == value.Length) {
180 private static unsafe int CompareStringOrdinalIgnoreCase(char* string1, int count1, char* string2, int count2)
182 Debug.Assert(!GlobalizationMode.Invariant);
184 return Interop.Globalization.CompareStringOrdinalIgnoreCase(string1, count1, string2, count2);
187 // TODO https://github.com/dotnet/coreclr/issues/13827:
188 // This method shouldn't be necessary, as we should be able to just use the overload
189 // that takes two spans. But due to this issue, that's adding significant overhead.
190 private unsafe int CompareString(ReadOnlySpan<char> string1, string string2, CompareOptions options)
192 Debug.Assert(!_invariantMode);
193 Debug.Assert(string2 != null);
194 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
196 fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
197 fixed (char* pString2 = &string2.GetRawStringData())
199 return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
203 private unsafe int CompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
205 Debug.Assert(!_invariantMode);
206 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
208 fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
209 fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
211 return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options);
215 internal unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr)
217 Debug.Assert(!_invariantMode);
219 Debug.Assert(!string.IsNullOrEmpty(source));
220 Debug.Assert(target != null);
221 Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
225 if (target.Length == 0)
227 if (matchLengthPtr != null)
232 if (options == CompareOptions.Ordinal)
234 index = IndexOfOrdinal(source, target, startIndex, count, ignoreCase: false);
237 if (matchLengthPtr != null)
238 *matchLengthPtr = target.Length;
243 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort())
245 index = IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
248 if (matchLengthPtr != null)
249 *matchLengthPtr = target.Length;
254 fixed (char* pSource = source)
256 index = Interop.Globalization.IndexOf(_sortHandle, target, target.Length, pSource + startIndex, count, options, matchLengthPtr);
258 return index != -1 ? index + startIndex : -1;
262 // For now, this method is only called from Span APIs with either options == CompareOptions.None or CompareOptions.IgnoreCase
263 internal unsafe int IndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr)
265 Debug.Assert(!_invariantMode);
266 Debug.Assert(source.Length != 0);
267 Debug.Assert(target.Length != 0);
269 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
271 if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
273 return IndexOfOrdinalIgnoreCaseHelper(source, target, options, matchLengthPtr);
277 return IndexOfOrdinalHelper(source, target, options, matchLengthPtr);
282 fixed (char* pSource = &MemoryMarshal.GetReference(source))
283 fixed (char* pTarget = &MemoryMarshal.GetReference(target))
285 return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr);
290 private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr)
292 Debug.Assert(!_invariantMode);
294 Debug.Assert(!source.IsEmpty);
295 Debug.Assert(!target.IsEmpty);
296 Debug.Assert(_isAsciiEqualityOrdinal);
298 fixed (char* ap = &MemoryMarshal.GetReference(source))
299 fixed (char* bp = &MemoryMarshal.GetReference(target))
303 int endIndex = source.Length - target.Length;
308 for (int j = 0; j < target.Length; j++)
310 char targetChar = *(b + j);
311 if (targetChar >= 0x80 || s_highCharTable[targetChar])
316 for (; i <= endIndex; i++)
321 for (; targetIndex < target.Length; targetIndex++)
323 char valueChar = *(a + sourceIndex);
324 char targetChar = *(b + targetIndex);
326 if (valueChar == targetChar && valueChar < 0x80 && !s_highCharTable[valueChar])
332 // uppercase both chars - notice that we need just one compare per char
333 if ((uint)(valueChar - 'a') <= ('z' - 'a'))
334 valueChar = (char)(valueChar - 0x20);
335 if ((uint)(targetChar - 'a') <= ('z' - 'a'))
336 targetChar = (char)(targetChar - 0x20);
338 if (valueChar >= 0x80 || s_highCharTable[valueChar])
340 else if (valueChar != targetChar)
345 if (targetIndex == target.Length)
347 if (matchLengthPtr != null)
348 *matchLengthPtr = target.Length;
355 return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
359 private unsafe int IndexOfOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr)
361 Debug.Assert(!_invariantMode);
363 Debug.Assert(!source.IsEmpty);
364 Debug.Assert(!target.IsEmpty);
365 Debug.Assert(_isAsciiEqualityOrdinal);
367 fixed (char* ap = &MemoryMarshal.GetReference(source))
368 fixed (char* bp = &MemoryMarshal.GetReference(target))
372 int endIndex = source.Length - target.Length;
377 for (int j = 0; j < target.Length; j++)
379 char targetChar = *(b + j);
380 if (targetChar >= 0x80 || s_highCharTable[targetChar])
385 for (; i <= endIndex; i++)
390 for (; targetIndex < target.Length; targetIndex++)
392 char valueChar = *(a + sourceIndex);
393 char targetChar = *(b + targetIndex);
394 if (valueChar >= 0x80 || s_highCharTable[valueChar])
396 else if (valueChar != targetChar)
401 if (targetIndex == target.Length)
403 if (matchLengthPtr != null)
404 *matchLengthPtr = target.Length;
411 return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
415 private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
417 Debug.Assert(!_invariantMode);
419 Debug.Assert(!string.IsNullOrEmpty(source));
420 Debug.Assert(target != null);
421 Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
423 if (target.Length == 0)
428 if (options == CompareOptions.Ordinal)
430 return LastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false);
433 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort())
435 return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options));
438 // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source
439 // of the start of the string that is count characters away from startIndex.
440 int leftStartIndex = (startIndex - count + 1);
442 fixed (char* pSource = source)
444 int lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, target, target.Length, pSource + (startIndex - count + 1), count, options);
446 return lastIndex != -1 ? lastIndex + leftStartIndex : -1;
450 private bool StartsWith(string source, string prefix, CompareOptions options)
452 Debug.Assert(!_invariantMode);
454 Debug.Assert(!string.IsNullOrEmpty(source));
455 Debug.Assert(!string.IsNullOrEmpty(prefix));
456 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
458 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && prefix.IsFastSort())
460 return IsPrefix(source, prefix, GetOrdinalCompareOptions(options));
463 return Interop.Globalization.StartsWith(_sortHandle, prefix, prefix.Length, source, source.Length, options);
466 private unsafe bool StartsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
468 Debug.Assert(!_invariantMode);
470 Debug.Assert(!source.IsEmpty);
471 Debug.Assert(!prefix.IsEmpty);
472 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
474 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
476 if (source.Length < prefix.Length)
481 if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
483 return StartsWithOrdinalIgnoreCaseHelper(source, prefix, options);
487 return StartsWithOrdinalHelper(source, prefix, options);
492 fixed (char* pSource = &MemoryMarshal.GetReference(source))
493 fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
495 return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options);
500 private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
502 Debug.Assert(!_invariantMode);
504 Debug.Assert(!source.IsEmpty);
505 Debug.Assert(!prefix.IsEmpty);
506 Debug.Assert(_isAsciiEqualityOrdinal);
507 Debug.Assert(source.Length >= prefix.Length);
509 int length = prefix.Length;
511 fixed (char* ap = &MemoryMarshal.GetReference(source))
512 fixed (char* bp = &MemoryMarshal.GetReference(prefix))
517 while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
529 // uppercase both chars - notice that we need just one compare per char
530 if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
531 if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
541 if (length == 0) return true;
542 return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options);
546 private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> prefix, CompareOptions options)
548 Debug.Assert(!_invariantMode);
550 Debug.Assert(!source.IsEmpty);
551 Debug.Assert(!prefix.IsEmpty);
552 Debug.Assert(_isAsciiEqualityOrdinal);
553 Debug.Assert(source.Length >= prefix.Length);
555 int length = prefix.Length;
557 fixed (char* ap = &MemoryMarshal.GetReference(source))
558 fixed (char* bp = &MemoryMarshal.GetReference(prefix))
563 while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
576 if (length == 0) return true;
577 return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options);
581 private bool EndsWith(string source, string suffix, CompareOptions options)
583 Debug.Assert(!_invariantMode);
585 Debug.Assert(!string.IsNullOrEmpty(source));
586 Debug.Assert(!string.IsNullOrEmpty(suffix));
587 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
589 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && suffix.IsFastSort())
591 return IsSuffix(source, suffix, GetOrdinalCompareOptions(options));
594 return Interop.Globalization.EndsWith(_sortHandle, suffix, suffix.Length, source, source.Length, options);
597 private unsafe bool EndsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
599 Debug.Assert(!_invariantMode);
601 Debug.Assert(!source.IsEmpty);
602 Debug.Assert(!suffix.IsEmpty);
603 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
605 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
607 if (source.Length < suffix.Length)
612 if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
614 return EndsWithOrdinalIgnoreCaseHelper(source, suffix, options);
618 return EndsWithOrdinalHelper(source, suffix, options);
623 fixed (char* pSource = &MemoryMarshal.GetReference(source))
624 fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix))
626 return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options);
631 private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
633 Debug.Assert(!_invariantMode);
635 Debug.Assert(!source.IsEmpty);
636 Debug.Assert(!suffix.IsEmpty);
637 Debug.Assert(_isAsciiEqualityOrdinal);
638 Debug.Assert(source.Length >= suffix.Length);
640 int length = suffix.Length;
642 fixed (char* ap = &MemoryMarshal.GetReference(source))
643 fixed (char* bp = &MemoryMarshal.GetReference(suffix))
645 char* a = ap + source.Length - 1;
646 char* b = bp + suffix.Length - 1;
648 while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
660 // uppercase both chars - notice that we need just one compare per char
661 if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20;
662 if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20;
672 if (length == 0) return true;
673 return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options);
677 private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<char> suffix, CompareOptions options)
679 Debug.Assert(!_invariantMode);
681 Debug.Assert(!source.IsEmpty);
682 Debug.Assert(!suffix.IsEmpty);
683 Debug.Assert(_isAsciiEqualityOrdinal);
684 Debug.Assert(source.Length >= suffix.Length);
686 int length = suffix.Length;
688 fixed (char* ap = &MemoryMarshal.GetReference(source))
689 fixed (char* bp = &MemoryMarshal.GetReference(suffix))
691 char* a = ap + source.Length - 1;
692 char* b = bp + suffix.Length - 1;
694 while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b]))
707 if (length == 0) return true;
708 return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options);
712 private unsafe SortKey CreateSortKey(String source, CompareOptions options)
714 Debug.Assert(!_invariantMode);
716 if (source==null) { throw new ArgumentNullException(nameof(source)); }
718 if ((options & ValidSortkeyCtorMaskOffFlags) != 0)
720 throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options));
724 if (source.Length == 0)
726 keyData = Array.Empty<Byte>();
730 int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, null, 0, options);
731 keyData = new byte[sortKeyLength];
733 fixed (byte* pSortKey = keyData)
735 Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options);
739 return new SortKey(Name, source, options, keyData);
742 private static unsafe bool IsSortable(char *text, int length)
744 Debug.Assert(!GlobalizationMode.Invariant);
749 while (index < length)
751 if (Char.IsHighSurrogate(text[index]))
753 if (index == length - 1 || !Char.IsLowSurrogate(text[index+1]))
754 return false; // unpaired surrogate
756 uc = CharUnicodeInfo.GetUnicodeCategory(Char.ConvertToUtf32(text[index], text[index+1]));
757 if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
764 if (Char.IsLowSurrogate(text[index]))
766 return false; // unpaired surrogate
769 uc = CharUnicodeInfo.GetUnicodeCategory(text[index]);
770 if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
781 // -----------------------------
782 // ---- PAL layer ends here ----
783 // -----------------------------
785 internal unsafe int GetHashCodeOfStringCore(string source, CompareOptions options)
787 Debug.Assert(!_invariantMode);
789 Debug.Assert(source != null);
790 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
792 if (source.Length == 0)
797 int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, null, 0, options);
799 // As an optimization, for small sort keys we allocate the buffer on the stack.
800 if (sortKeyLength <= 256)
802 byte* pSortKey = stackalloc byte[sortKeyLength];
803 Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options);
804 return InternalHashSortKey(pSortKey, sortKeyLength);
807 byte[] sortKey = new byte[sortKeyLength];
809 fixed (byte* pSortKey = sortKey)
811 Interop.Globalization.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options);
812 return InternalHashSortKey(pSortKey, sortKeyLength);
816 [DllImport(JitHelpers.QCall)]
817 private static extern unsafe int InternalHashSortKey(byte* sortKey, int sortKeyLength);
819 private static CompareOptions GetOrdinalCompareOptions(CompareOptions options)
821 if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase)
823 return CompareOptions.OrdinalIgnoreCase;
827 return CompareOptions.Ordinal;
831 private static bool CanUseAsciiOrdinalForOptions(CompareOptions options)
833 // Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. ').
834 return (options & CompareOptions.IgnoreSymbols) == 0;
837 private static byte[] GetNullTerminatedUtf8String(string s)
839 int byteLen = System.Text.Encoding.UTF8.GetByteCount(s);
841 // Allocate an extra byte (which defaults to 0) as the null terminator.
842 byte[] buffer = new byte[byteLen + 1];
844 int bytesWritten = System.Text.Encoding.UTF8.GetBytes(s, 0, s.Length, buffer, 0);
846 Debug.Assert(bytesWritten == byteLen);
851 private SortVersion GetSortVersion()
853 Debug.Assert(!_invariantMode);
855 int sortVersion = Interop.Globalization.GetSortVersion(_sortHandle);
856 return new SortVersion(sortVersion, LCID, new Guid(sortVersion, 0, 0, 0, 0, 0, 0,
858 (byte) ((LCID & 0x00FF0000) >> 16),
859 (byte) ((LCID & 0x0000FF00) >> 8),
860 (byte) (LCID & 0xFF)));
863 // See https://github.com/dotnet/coreclr/blob/master/src/utilcode/util_nodependencies.cpp#L970
864 private static readonly bool[] s_highCharTable = new bool[0x80]