Use IndexOfAnyValues in System.Net.Http (#78660)
authorMiha Zupan <mihazupan.zupan1@gmail.com>
Tue, 22 Nov 2022 02:20:42 +0000 (02:20 +0000)
committerGitHub <noreply@github.com>
Tue, 22 Nov 2022 02:20:42 +0000 (21:20 -0500)
src/libraries/System.Net.Http/src/System/Net/Http/Headers/AltSvcHeaderParser.cs
src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderUtilities.cs
src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeader.cs
src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.cs
src/libraries/System.Net.Http/src/System/Net/Http/HttpRuleParser.cs
src/libraries/System.Net.Http/tests/UnitTests/HttpRuleParserTest.cs
src/libraries/System.Net.Primitives/src/System/Net/Cookie.cs

index beef884..2293596 100644 (file)
@@ -406,12 +406,10 @@ namespace System.Net.Http.Headers
                 return false;
             }
 
-            if (HttpRuleParser.IsTokenChar(value[startIndex]))
+            int tokenLength = HttpRuleParser.GetTokenLength(value, startIndex);
+            if (tokenLength > 0)
             {
                 // No reason for integers to be quoted, so this should be the hot path.
-
-                int tokenLength = HttpRuleParser.GetTokenLength(value, startIndex);
-
                 readLength = tokenLength;
                 return HeaderUtilities.TryParseInt32(value, startIndex, tokenLength, out result);
             }
@@ -471,9 +469,10 @@ namespace System.Net.Http.Headers
                 return false;
             }
 
-            if (HttpRuleParser.IsTokenChar(value[startIndex]))
+            int tokenLength = HttpRuleParser.GetTokenLength(value, startIndex);
+            if (tokenLength > 0)
             {
-                readLength = HttpRuleParser.GetTokenLength(value, startIndex);
+                readLength = tokenLength;
                 return true;
             }
 
index 14b1075..711c8a2 100644 (file)
@@ -21,6 +21,11 @@ namespace System.Net.Http.Headers
 
         internal const string BytesUnit = "bytes";
 
+        // attr-char = ALPHA / DIGIT / "!" / "#" / "$" / "&" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+        //      ; token except ( "*" / "'" / "%" )
+        private static readonly IndexOfAnyValues<byte> s_rfc5987AttrBytes =
+            IndexOfAnyValues.Create("!#$&+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"u8);
+
         internal static void SetQuality(UnvalidatedObjectCollection<NameValueHeaderValue> parameters, double? value)
         {
             Debug.Assert(parameters != null);
@@ -76,36 +81,45 @@ namespace System.Net.Http.Headers
         // encoding'lang'PercentEncodedSpecials
         internal static string Encode5987(string input)
         {
-            // Encode a string using RFC 5987 encoding.
-            // encoding'lang'PercentEncodedSpecials
             var builder = new ValueStringBuilder(stackalloc char[256]);
             byte[] utf8bytes = ArrayPool<byte>.Shared.Rent(Encoding.UTF8.GetMaxByteCount(input.Length));
             int utf8length = Encoding.UTF8.GetBytes(input, 0, input.Length, utf8bytes, 0);
 
             builder.Append("utf-8\'\'");
-            for (int i = 0; i < utf8length; i++)
+
+            ReadOnlySpan<byte> utf8 = utf8bytes.AsSpan(0, utf8length);
+            do
             {
-                byte utf8byte = utf8bytes[i];
+                int length = utf8.IndexOfAnyExcept(s_rfc5987AttrBytes);
+                if (length < 0)
+                {
+                    length = utf8.Length;
+                }
+
+                Encoding.ASCII.GetChars(utf8.Slice(0, length), builder.AppendSpan(length));
 
-                // attr-char = ALPHA / DIGIT / "!" / "#" / "$" / "&" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
-                //      ; token except ( "*" / "'" / "%" )
-                if (utf8byte > 0x7F) // Encodes as multiple utf-8 bytes
+                utf8 = utf8.Slice(length);
+
+                if (utf8.IsEmpty)
                 {
-                    AddHexEscaped(utf8byte, ref builder);
+                    break;
                 }
-                else if (!HttpRuleParser.IsTokenChar((char)utf8byte) || utf8byte == '*' || utf8byte == '\'' || utf8byte == '%')
+
+                length = utf8.IndexOfAny(s_rfc5987AttrBytes);
+                if (length < 0)
                 {
-                    // ASCII - Only one encoded byte.
-                    AddHexEscaped(utf8byte, ref builder);
+                    length = utf8.Length;
                 }
-                else
+
+                foreach (byte b in utf8.Slice(0, length))
                 {
-                    builder.Append((char)utf8byte);
+                    AddHexEscaped(b, ref builder);
                 }
 
+                utf8 = utf8.Slice(length);
             }
+            while (!utf8.IsEmpty);
 
-            Array.Clear(utf8bytes, 0, utf8length);
             ArrayPool<byte>.Shared.Return(utf8bytes);
 
             return builder.ToString();
index 0163db0..ab78688 100644 (file)
@@ -12,13 +12,13 @@ namespace System.Net.Http.Headers
             this(name, HttpHeaderType.Custom, parser: null, knownValues: null, http2StaticTableIndex, http3StaticTableIndex)
         {
             Debug.Assert(!string.IsNullOrEmpty(name));
-            Debug.Assert(name[0] == ':' || HttpRuleParser.GetTokenLength(name, 0) == name.Length);
+            Debug.Assert(name[0] == ':' || HttpRuleParser.IsToken(name));
         }
 
         public KnownHeader(string name, HttpHeaderType headerType, HttpHeaderParser? parser, string[]? knownValues = null, int? http2StaticTableIndex = null, int? http3StaticTableIndex = null)
         {
             Debug.Assert(!string.IsNullOrEmpty(name));
-            Debug.Assert(name[0] == ':' || HttpRuleParser.GetTokenLength(name, 0) == name.Length);
+            Debug.Assert(name[0] == ':' || HttpRuleParser.IsToken(name));
 
             Name = name;
             HeaderType = headerType;
index 1ef905c..cda7a9f 100644 (file)
@@ -82,7 +82,7 @@ namespace System.Net.Http
             {
                 throw new ArgumentException(SR.net_http_argument_empty_string, nameof(method));
             }
-            if (HttpRuleParser.GetTokenLength(method, 0) != method.Length)
+            if (!HttpRuleParser.IsToken(method))
             {
                 throw new FormatException(SR.net_http_httpmethod_format_error);
             }
index f7c5564..ab0a01a 100644 (file)
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
 using System.Diagnostics;
 using System.Text;
 
@@ -8,7 +9,17 @@ namespace System.Net.Http
 {
     internal static class HttpRuleParser
     {
-        private static readonly bool[] s_tokenChars = CreateTokenChars();
+        // token = 1*<any CHAR except CTLs or separators>
+        // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
+        private static readonly IndexOfAnyValues<char> s_tokenChars =
+            IndexOfAnyValues.Create("!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~");
+
+        private static readonly IndexOfAnyValues<byte> s_tokenBytes =
+            IndexOfAnyValues.Create("!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"u8);
+
+        private static readonly IndexOfAnyValues<char> s_hostDelimiterChars =
+            IndexOfAnyValues.Create("/ \t\r,");
+
         private const int MaxNestedCount = 5;
 
         internal const char CR = (char)13;
@@ -18,98 +29,22 @@ namespace System.Net.Http
 
         internal static Encoding DefaultHttpEncoding => Encoding.Latin1;
 
-        private static bool[] CreateTokenChars()
-        {
-            // token = 1*<any CHAR except CTLs or separators>
-            // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
-
-            var tokenChars = new bool[128]; // All elements default to "false".
-
-            for (int i = 33; i < 127; i++) // Skip Space (32) & DEL (127).
-            {
-                tokenChars[i] = true;
-            }
-
-            // Remove separators: these are not valid token characters.
-            tokenChars[(byte)'('] = false;
-            tokenChars[(byte)')'] = false;
-            tokenChars[(byte)'<'] = false;
-            tokenChars[(byte)'>'] = false;
-            tokenChars[(byte)'@'] = false;
-            tokenChars[(byte)','] = false;
-            tokenChars[(byte)';'] = false;
-            tokenChars[(byte)':'] = false;
-            tokenChars[(byte)'\\'] = false;
-            tokenChars[(byte)'"'] = false;
-            tokenChars[(byte)'/'] = false;
-            tokenChars[(byte)'['] = false;
-            tokenChars[(byte)']'] = false;
-            tokenChars[(byte)'?'] = false;
-            tokenChars[(byte)'='] = false;
-            tokenChars[(byte)'{'] = false;
-            tokenChars[(byte)'}'] = false;
-
-            return tokenChars;
-        }
-
-        internal static bool IsTokenChar(char character)
-        {
-            // Must be between 'space' (32) and 'DEL' (127).
-            if (character > 127)
-            {
-                return false;
-            }
-
-            return s_tokenChars[character];
-        }
-
         internal static int GetTokenLength(string input, int startIndex)
         {
-            Debug.Assert(input != null);
+            Debug.Assert(input is not null);
 
-            if (startIndex >= input.Length)
-            {
-                return 0;
-            }
-
-            int current = startIndex;
-
-            while (current < input.Length)
-            {
-                if (!IsTokenChar(input[current]))
-                {
-                    return current - startIndex;
-                }
-                current++;
-            }
-            return input.Length - startIndex;
-        }
+            ReadOnlySpan<char> slice = input.AsSpan(startIndex);
 
-        internal static bool IsToken(string input)
-        {
-            for (int i = 0; i < input.Length; i++)
-            {
-                if (!IsTokenChar(input[i]))
-                {
-                    return false;
-                }
-            }
+            int index = slice.IndexOfAnyExcept(s_tokenChars);
 
-            return true;
+            return index < 0 ? slice.Length : index;
         }
 
-        internal static bool IsToken(ReadOnlySpan<byte> input)
-        {
-            for (int i = 0; i < input.Length; i++)
-            {
-                if (!IsTokenChar((char)input[i]))
-                {
-                    return false;
-                }
-            }
+        internal static bool IsToken(ReadOnlySpan<char> input) =>
+            input.IndexOfAnyExcept(s_tokenChars) < 0;
 
-            return true;
-        }
+        internal static bool IsToken(ReadOnlySpan<byte> input) =>
+            input.IndexOfAnyExcept(s_tokenBytes) < 0;
 
         internal static string GetTokenString(ReadOnlySpan<byte> input)
         {
@@ -147,10 +82,8 @@ namespace System.Net.Http
             return input.Length - startIndex;
         }
 
-        internal static bool ContainsNewLine(string value, int startIndex = 0)
-        {
-            return value.AsSpan(startIndex).IndexOfAny('\r', '\n') != -1;
-        }
+        internal static bool ContainsNewLine(string value, int startIndex = 0) =>
+            value.AsSpan(startIndex).IndexOfAny('\r', '\n') >= 0;
 
         internal static int GetNumberLength(string input, int startIndex, bool allowDecimal)
         {
@@ -206,41 +139,33 @@ namespace System.Net.Http
                 return 0;
             }
 
+            ReadOnlySpan<char> slice = input.AsSpan(startIndex);
+
             // A 'host' is either a token (if 'allowToken' == true) or a valid host name as defined by the URI RFC.
             // So we first iterate through the string and search for path delimiters and whitespace. When found, stop
             // and try to use the substring as token or URI host name. If it works, we have a host name, otherwise not.
-            int current = startIndex;
-            bool isToken = true;
-            while (current < input.Length)
+            int index = slice.IndexOfAny(s_hostDelimiterChars);
+            if (index >= 0)
             {
-                char c = input[current];
-                if (c == '/')
+                if (index == 0)
                 {
-                    return 0; // Host header must not contain paths.
+                    return 0;
                 }
 
-                if ((c == ' ') || (c == '\t') || (c == '\r') || (c == ','))
+                if (slice[index] == '/')
                 {
-                    break; // We hit a delimiter (',' or whitespace). Stop here.
+                    return 0; // Host header must not contain paths.
                 }
 
-                isToken = isToken && IsTokenChar(c);
-
-                current++;
+                slice = slice.Slice(0, index);
             }
 
-            int length = current - startIndex;
-            if (length == 0)
+            if ((allowToken && IsToken(slice)) || IsValidHostName(slice))
             {
-                return 0;
-            }
-
-            if ((!allowToken || !isToken) && !IsValidHostName(input.AsSpan(startIndex, length)))
-            {
-                return 0;
+                return slice.Length;
             }
 
-            return length;
+            return 0;
         }
 
         internal static HttpParseResult GetCommentLength(string input, int startIndex, out int length)
index 35b3f60..29e4994 100644 (file)
@@ -41,16 +41,18 @@ namespace System.Net.Http.Tests
 
         [Theory]
         [MemberData(nameof(ValidTokenCharsArguments))]
-        public void IsTokenChar_ValidTokenChars_ConsideredValid(char token)
+        public void IsToken_ValidTokenChars_ConsideredValid(char token)
         {
-            Assert.True(HttpRuleParser.IsTokenChar(token));
+            Assert.True(HttpRuleParser.IsToken(stackalloc[] { token }));
+            Assert.True(HttpRuleParser.IsToken(new ReadOnlySpan<byte>((byte)token)));
         }
 
         [Theory]
         [MemberData(nameof(InvalidTokenCharsArguments))]
-        public void IsTokenChar_InvalidTokenChars_ConsideredInvalid(char token)
+        public void IsToken_InvalidTokenChars_ConsideredInvalid(char token)
         {
-            Assert.False(HttpRuleParser.IsTokenChar(token));
+            Assert.False(HttpRuleParser.IsToken(stackalloc[] { token }));
+            Assert.False(HttpRuleParser.IsToken(new ReadOnlySpan<byte>((byte)token)));
         }
 
         [Fact]
index ee319ca..12ca4d8 100644 (file)
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
@@ -41,7 +42,7 @@ namespace System.Net
 
         internal static readonly char[] PortSplitDelimiters = new char[] { ' ', ',', '\"' };
         // Space (' ') should be reserved as well per RFCs, but major web browsers support it and some web sites use it - so we support it too
-        internal const string ReservedToName = "\t\r\n=;,";
+        private static readonly IndexOfAnyValues<char> s_reservedToNameChars = IndexOfAnyValues.Create("\t\r\n=;,");
 
         private string m_comment = string.Empty; // Do not rename (binary serialization)
         private Uri? m_commentUri; // Do not rename (binary serialization)
@@ -238,7 +239,7 @@ namespace System.Net
                 || value.StartsWith('$')
                 || value.StartsWith(' ')
                 || value.EndsWith(' ')
-                || value.AsSpan().IndexOfAny(ReservedToName) >= 0)
+                || value.AsSpan().IndexOfAny(s_reservedToNameChars) >= 0)
             {
                 m_name = string.Empty;
                 return false;
@@ -346,7 +347,7 @@ namespace System.Net
                 m_name.StartsWith('$') ||
                 m_name.StartsWith(' ') ||
                 m_name.EndsWith(' ') ||
-                m_name.AsSpan().IndexOfAny(ReservedToName) >= 0)
+                m_name.AsSpan().IndexOfAny(s_reservedToNameChars) >= 0)
             {
                 if (shouldThrow)
                 {