Use IndexOfAnyValues in Xml (#78664)
authorMiha Zupan <mihazupan.zupan1@gmail.com>
Wed, 23 Nov 2022 05:04:53 +0000 (05:04 +0000)
committerGitHub <noreply@github.com>
Wed, 23 Nov 2022 05:04:53 +0000 (21:04 -0800)
* Use IndexOfAnyValues in Xml

* Avoid checking whitespace char twice

* More spans

src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseReader.cs
src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseWriter.cs
src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBufferReader.cs
src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlConverter.cs
src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs
src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs

index f797800..5c178ea 100644 (file)
@@ -1514,20 +1514,14 @@ namespace System.Xml
                 catch (FormatException exception)
                 {
                     // Something was wrong with the format, see if we can strip the spaces
-                    int i = 0;
-                    int j = 0;
-                    while (true)
+                    int newCount = XmlConverter.StripWhitespace(chars.AsSpan(0, charCount));
+                    if (newCount == charCount)
                     {
-                        while (j < charCount && XmlConverter.IsWhitespace(chars[j]))
-                            j++;
-                        if (j == charCount)
-                            break;
-                        chars[i++] = chars[j++];
-                    }
-                    // No spaces, so don't try again
-                    if (i == charCount)
+                        // No spaces, so don't try again
                         throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(exception.Message, exception.InnerException));
-                    charCount = i;
+                    }
+
+                    charCount = newCount;
                 }
             }
         }
index 784033b..5ced565 100644 (file)
@@ -750,27 +750,20 @@ namespace System.Xml
 
         private static void VerifyWhitespace(char ch)
         {
-            if (!IsWhitespace(ch))
-                throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
+            if (!XmlConverter.IsWhitespace(ch))
+                throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
         }
 
         private static void VerifyWhitespace(string s)
         {
-            for (int i = 0; i < s.Length; i++)
-                if (!IsWhitespace(s[i]))
-                    throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
+            if (!XmlConverter.IsWhitespace(s))
+                throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
         }
 
         private static void VerifyWhitespace(char[] chars, int offset, int count)
         {
-            for (int i = 0; i < count; i++)
-                if (!IsWhitespace(chars[offset + i]))
-                    throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
-        }
-
-        private static bool IsWhitespace(char ch)
-        {
-            return (ch == ' ' || ch == '\n' || ch == '\r' || ch == 't');
+            if (!XmlConverter.IsWhitespace(chars.AsSpan(offset, count)))
+                throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
         }
 
         protected static void EndContent()
index 57b7181..995d64b 100644 (file)
@@ -768,23 +768,12 @@ namespace System.Xml
         public bool IsWhitespaceKey(int key)
         {
             string s = GetDictionaryString(key).Value;
-            for (int i = 0; i < s.Length; i++)
-            {
-                if (!XmlConverter.IsWhitespace(s[i]))
-                    return false;
-            }
-            return true;
+            return XmlConverter.IsWhitespace(s);
         }
 
         public bool IsWhitespaceUTF8(int offset, int length)
         {
-            byte[] buffer = _buffer;
-            for (int i = 0; i < length; i++)
-            {
-                if (!XmlConverter.IsWhitespace((char)buffer[offset + i]))
-                    return false;
-            }
-            return true;
+            return XmlConverter.IsWhitespace(_buffer.AsSpan(offset, length));
         }
 
         public bool IsWhitespaceUnicode(int offset, int length)
index 52df284..71867bf 100644 (file)
@@ -14,7 +14,7 @@ using System.Globalization;
 using System.Runtime.Serialization;
 using System.Collections.Generic;
 using System.Collections.ObjectModel;
-
+using System.Buffers;
 
 namespace System.Xml
 {
@@ -30,6 +30,10 @@ namespace System.Xml
         public const int MaxUInt64Chars = 32;
         public const int MaxPrimitiveChars = MaxDateTimeChars;
 
+        // Matches IsWhitespace below
+        private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(" \t\r\n");
+        private static readonly IndexOfAnyValues<byte> s_whitespaceBytes = IndexOfAnyValues.Create(" \t\r\n"u8);
+
         public static bool ToBoolean(string value)
         {
             try
@@ -1082,45 +1086,62 @@ namespace System.Xml
             return offset - offsetMin;
         }
 
-        public static bool IsWhitespace(string s)
+        public static bool IsWhitespace(ReadOnlySpan<char> chars) =>
+            chars.IndexOfAnyExcept(s_whitespaceChars) < 0;
+
+        public static bool IsWhitespace(ReadOnlySpan<byte> bytes) =>
+            bytes.IndexOfAnyExcept(s_whitespaceBytes) < 0;
+
+        public static bool IsWhitespace(char ch) =>
+            ch is <= ' ' and (' ' or '\t' or '\r' or '\n');
+
+        public static int StripWhitespace(Span<char> chars)
         {
-            for (int i = 0; i < s.Length; i++)
+            int count = chars.IndexOfAny(s_whitespaceChars);
+            if (count < 0)
             {
-                if (!IsWhitespace(s[i]))
-                    return false;
+                return chars.Length;
             }
-            return true;
-        }
 
-        public static bool IsWhitespace(char ch)
-        {
-            return (ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'));
+            foreach (char c in chars.Slice(count + 1))
+            {
+                if (!IsWhitespace(c))
+                {
+                    chars[count++] = c;
+                }
+            }
+
+            return count;
         }
 
         public static string StripWhitespace(string s)
         {
-            int count = s.Length;
-            for (int i = 0; i < s.Length; i++)
+            int indexOfWhitespace = s.AsSpan().IndexOfAny(s_whitespaceChars);
+            if (indexOfWhitespace < 0)
             {
-                if (IsWhitespace(s[i]))
+                return s;
+            }
+
+            int count = s.Length - 1;
+            foreach (char c in s.AsSpan(indexOfWhitespace + 1))
+            {
+                if (IsWhitespace(c))
                 {
                     count--;
                 }
             }
-            if (count == s.Length)
-                return s;
 
-            return string.Create(count, s, (chars, s) =>
+            return string.Create(count, s, static (chars, s) =>
             {
                 int count = 0;
-                for (int i = 0; i < s.Length; i++)
+                foreach (char c in s)
                 {
-                    char ch = s[i];
-                    if (!IsWhitespace(ch))
+                    if (!IsWhitespace(c))
                     {
-                        chars[count++] = ch;
+                        chars[count++] = c;
                     }
                 }
+                Debug.Assert(count == chars.Length);
             });
         }
     }
index 21233b4..72547a9 100644 (file)
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
@@ -13,6 +14,19 @@ namespace System.Xml
     /// </summary>
     internal static class XmlCharType
     {
+#if DEBUG
+        static XmlCharType()
+        {
+            for (int i = 0; i < 128; i++)
+            {
+                char c = (char)i;
+                Debug.Assert(PublicIdChars.Contains(c) == IsPubidChar(c));
+                Debug.Assert(AsciiCharDataChars.Contains(c) == IsCharData(c));
+                Debug.Assert(WhiteSpaceChars.Contains(c) == IsWhiteSpace(c));
+            }
+        }
+#endif
+
         // Surrogate constants
         internal const int SurHighStart = 0xd800;    // 1101 10xx
         internal const int SurHighEnd = 0xdbff;
@@ -39,6 +53,13 @@ namespace System.Xml
         // bitmap for public ID characters - 1 bit per character 0x0 - 0x80; no character > 0x80 is a PUBLIC ID char
         private const string PublicIdBitmap = "\u2400\u0000\uffbb\uafff\uffff\u87ff\ufffe\u07ff";
 
+        private const string PublicIdChars = "\n\r !#$%'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
+        private const string AsciiCharDataChars = "\t\n\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7f";
+        private const string WhiteSpaceChars = "\t\n\r ";
+
+        private static readonly IndexOfAnyValues<char> s_publicIdChars = IndexOfAnyValues.Create(PublicIdChars);
+        private static readonly IndexOfAnyValues<char> s_asciiCharDataChars = IndexOfAnyValues.Create(AsciiCharDataChars);
+        private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(WhiteSpaceChars);
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool IsWhiteSpace(char ch) => (GetCharProperties(ch) & Whitespace) != 0u;
@@ -109,46 +130,36 @@ namespace System.Xml
             highChar = (char)(SurHighStart + v / 1024);
         }
 
-        internal static bool IsOnlyWhitespace(string? str)
-        {
-            return IsOnlyWhitespaceWithPos(str) == -1;
-        }
+        internal static bool IsOnlyWhitespace(ReadOnlySpan<char> str) =>
+            IsOnlyWhitespaceWithPos(str) < 0;
 
         // Character checking on strings
-        internal static int IsOnlyWhitespaceWithPos(string? str)
+        internal static int IsOnlyWhitespaceWithPos(ReadOnlySpan<char> str) =>
+            str.IndexOfAnyExcept(s_whitespaceChars);
+
+        internal static int IsOnlyCharData(ReadOnlySpan<char> str)
         {
-            if (str != null)
+            int i = str.IndexOfAnyExcept(s_asciiCharDataChars);
+            if (i < 0)
             {
-                for (int i = 0; i < str.Length; i++)
-                {
-                    if ((GetCharProperties(str[i]) & Whitespace) == 0u)
-                    {
-                        return i;
-                    }
-                }
+                // Fast-path: All ASCII CharData chars
+                return -1;
             }
-            return -1;
-        }
 
-        internal static int IsOnlyCharData(string str)
-        {
-            if (str != null)
+            for (; (uint)i < (uint)str.Length; i++)
             {
-                for (int i = 0; i < str.Length; i++)
+                char c = str[i];
+                if (!IsCharData(c))
                 {
-                    if ((GetCharProperties(str[i]) & CharData) == 0u)
+                    if ((uint)(i + 1) >= (uint)str.Length || !char.IsSurrogatePair(c, str[i + 1]))
                     {
-                        if (i + 1 >= str.Length || !(XmlCharType.IsHighSurrogate(str[i]) && XmlCharType.IsLowSurrogate(str[i + 1])))
-                        {
-                            return i;
-                        }
-                        else
-                        {
-                            i++;
-                        }
+                        return i;
                     }
+
+                    i++;
                 }
             }
+
             return -1;
         }
 
@@ -161,20 +172,8 @@ namespace System.Xml
             return str.AsSpan(startPos, len).IndexOfAnyExceptInRange('0', '9') < 0;
         }
 
-        internal static int IsPublicId(string str)
-        {
-            if (str != null)
-            {
-                for (int i = 0; i < str.Length; i++)
-                {
-                    if (!IsPubidChar(str[i]))
-                    {
-                        return i;
-                    }
-                }
-            }
-            return -1;
-        }
+        internal static int IsPublicId(string str) =>
+            str.AsSpan().IndexOfAnyExcept(s_publicIdChars);
 
         // This method tests whether a value is in a given range with just one test; start and end should be constants
         private static bool InRange(int value, int start, int end)
@@ -4286,6 +4285,5 @@ namespace System.Xml
             /* FFE0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0,
             /* FFF0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0x00, 0x00,
         };
-
     }
 }
index 6f3d099..c465eb9 100644 (file)
@@ -516,7 +516,7 @@ namespace System.Xml
 
             // returns the position of invalid character or -1
             int pos = XmlCharType.IsPublicId(publicId);
-            if (pos != -1)
+            if (pos >= 0)
             {
                 throw CreateInvalidCharException(publicId, pos, ExceptionType.XmlException);
             }
@@ -572,7 +572,7 @@ namespace System.Xml
             return XmlCharType.IsHighSurrogate(highChar) && XmlCharType.IsLowSurrogate(lowChar);
         }
 
-        // Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PublidChar
+        // Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PubidChar
         public static bool IsPublicIdChar(char ch)
         {
             return XmlCharType.IsPubidChar(ch);