break;
}
- UriParser? syntax = null;
- if (CheckSchemeSyntax(relativeStr.AsSpan(0, i), ref syntax) == ParsingError.None)
+ ParsingError error = ParsingError.None;
+ UriParser? syntax = CheckSchemeSyntax(relativeStr.AsSpan(0, i), ref error);
+
+ if (error == ParsingError.None)
{
if (baseUri.Syntax == syntax)
{
if (length >= c_MaxUriBufferSize)
return ParsingError.SizeLimit;
- //STEP1: parse scheme, lookup this Uri Syntax or create one using UnknownV1SyntaxFlags uri syntax template
- fixed (char* pUriString = uriString)
+ // Fast path for valid http(s) schemes with no leading whitespace that are expected to be very common.
+ if (uriString.StartsWith("https:", StringComparison.OrdinalIgnoreCase))
+ {
+ syntax = UriParser.HttpsUri;
+ flags |= (Flags)6;
+ }
+ else if (uriString.StartsWith("http:", StringComparison.OrdinalIgnoreCase))
{
+ syntax = UriParser.HttpUri;
+ flags |= (Flags)5;
+ }
+ else
+ {
+ // STEP1: parse scheme, lookup this Uri Syntax or create one using UnknownV1SyntaxFlags uri syntax template
ParsingError err = ParsingError.None;
- int idx = ParseSchemeCheckImplicitFile(pUriString, length, ref err, ref flags, ref syntax);
+ int idx = ParseSchemeCheckImplicitFile(uriString, ref err, ref flags, ref syntax);
+ Debug.Assert((err is ParsingError.None) == (syntax is not null));
if (err != ParsingError.None)
return err;
flags |= (Flags)idx;
}
+
return ParsingError.None;
}
InterlockedSetFlags(cF);
}
- //
// verifies the syntax of the scheme part
// Checks on implicit File: scheme due to simple Dos/Unc path passed
// returns the start of the next component position
- // throws UriFormatException if invalid scheme
- //
- private static unsafe int ParseSchemeCheckImplicitFile(char* uriString, int length,
- ref ParsingError err, ref Flags flags, ref UriParser? syntax)
+ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingError err, ref Flags flags, ref UriParser? syntax)
{
+ Debug.Assert(err == ParsingError.None);
Debug.Assert((flags & Flags.Debug_LeftConstructor) == 0);
- int idx = 0;
+ int i = 0;
- //skip whitespace
- while (idx < length && UriHelper.IsLWS(uriString[idx]))
+ // skip whitespace
+ while ((uint)i < (uint)uriString.Length && UriHelper.IsLWS(uriString[i]))
{
- ++idx;
+ i++;
}
// Unix: Unix path?
// A path starting with 2 / or \ (including mixed) is treated as UNC and will be matched below
- if (!OperatingSystem.IsWindows() && idx < length && uriString[idx] == '/' &&
- (idx + 1 == length || (uriString[idx + 1] != '/' && uriString[idx + 1] != '\\')))
+ if (!OperatingSystem.IsWindows() &&
+ (uint)i < (uint)uriString.Length && uriString[i] == '/' &&
+ ((uint)(i + 1) >= (uint)uriString.Length || uriString[i + 1] is not ('/' or '\\')))
{
flags |= (Flags.UnixPath | Flags.ImplicitFile | Flags.AuthorityFound);
syntax = UriParser.UnixFileUri;
- return idx;
+ return i;
}
- // sets the recognizer for well known registered schemes
- // file, ftp, http, https, uuid, etc
+ // Find the colon.
// Note that we don't support one-letter schemes that will be put into a DOS path bucket
+ int colonOffset = uriString.AsSpan(i).IndexOf(':');
- int end = idx;
- while (end < length && uriString[end] != ':')
- {
- ++end;
- }
-
- // NB: On 64-bits we will use less optimized code from CheckSchemeSyntax()
- //
- if (IntPtr.Size == 4)
- {
- // long = 4chars: The minimal size of a known scheme is 2 + ':'
- if (end != length && end >= idx + 2 &&
- CheckKnownSchemes((long*)(uriString + idx), end - idx, ref syntax))
- {
- return end + 1;
- }
- }
-
- //NB: A string must have at least 3 characters and at least 1 before ':'
- if (idx + 2 >= length || end == idx)
+ // NB: A string must have at least 3 characters and at least 1 before ':'
+ if ((uint)(i + 2) >= (uint)uriString.Length ||
+ colonOffset == 0 ||
+ // Redundant checks to eliminate range checks below
+ (uint)i >= (uint)uriString.Length ||
+ (uint)(i + 1) >= (uint)uriString.Length)
{
err = ParsingError.BadFormat;
return 0;
}
- //Check for supported special cases like a DOS file path OR a UNC share path
- //NB: A string may not have ':' if this is a UNC path
+ // Check for supported special cases like a DOS file path OR a UNC share path
+ // NB: A string may not have ':' if this is a UNC path
+ if (uriString[i + 1] is ':' or '|')
{
- char c;
- if ((c = uriString[idx + 1]) == ':' || c == '|')
+ // DOS-like path?
+ if (char.IsAsciiLetter(uriString[i]))
{
- //DOS-like path?
- if (char.IsAsciiLetter(uriString[idx]))
+ if (uriString[i + 2] is '\\' or '/')
{
- if ((c = uriString[idx + 2]) == '\\' || c == '/')
- {
- flags |= (Flags.DosPath | Flags.ImplicitFile | Flags.AuthorityFound);
- syntax = UriParser.FileUri;
- return idx;
- }
- err = ParsingError.MustRootedPath;
- return 0;
+ flags |= (Flags.DosPath | Flags.ImplicitFile | Flags.AuthorityFound);
+ syntax = UriParser.FileUri;
+ return i;
}
- if (c == ':')
- err = ParsingError.BadScheme;
- else
- err = ParsingError.BadFormat;
+
+ err = ParsingError.MustRootedPath;
return 0;
}
- else if ((c = uriString[idx]) == '/' || c == '\\')
+
+ err = uriString[i + 1] == ':' ? ParsingError.BadScheme : ParsingError.BadFormat;
+ return 0;
+ }
+ else if (uriString[i] is '/' or '\\')
+ {
+ // UNC share?
+ if (uriString[i + 1] is '\\' or '/')
{
- //UNC share?
- if ((c = uriString[idx + 1]) == '\\' || c == '/')
- {
- flags |= (Flags.UncPath | Flags.ImplicitFile | Flags.AuthorityFound);
- syntax = UriParser.FileUri;
- idx += 2;
- // V1.1 compat this will simply eat any slashes prepended to a UNC path
- while (idx < length && ((c = uriString[idx]) == '/' || c == '\\'))
- ++idx;
+ flags |= (Flags.UncPath | Flags.ImplicitFile | Flags.AuthorityFound);
+ syntax = UriParser.FileUri;
+ i += 2;
- return idx;
+ // V1.1 compat this will simply eat any slashes prepended to a UNC path
+ while ((uint)i < (uint)uriString.Length && uriString[i] is '/' or '\\')
+ {
+ i++;
}
- err = ParsingError.BadFormat;
- return 0;
+
+ return i;
}
+
+ err = ParsingError.BadFormat;
+ return 0;
}
- if (end == length)
+ if (colonOffset < 0)
{
err = ParsingError.BadFormat;
return 0;
// This is a potentially valid scheme, but we have not identified it yet.
// Check for illegal characters, canonicalize, and check the length.
- err = CheckSchemeSyntax(new ReadOnlySpan<char>(uriString + idx, end - idx), ref syntax!);
- if (err != ParsingError.None)
+ syntax = CheckSchemeSyntax(uriString.AsSpan(i, colonOffset), ref err);
+ if (syntax is null)
{
return 0;
}
- return end + 1;
+ return i + colonOffset + 1;
}
- //
- // Quickly parses well known schemes.
- // nChars does not include the last ':'. Assuming there is one at the end of passed buffer
- private static unsafe bool CheckKnownSchemes(long* lptr, int nChars, ref UriParser? syntax)
+ // This will check whether a scheme string follows the rules
+ private static UriParser? CheckSchemeSyntax(ReadOnlySpan<char> scheme, ref ParsingError error)
{
- //NOTE beware of too short input buffers!
-
- const long _HTTP_Mask0 = 'h' | ('t' << 16) | ((long)'t' << 32) | ((long)'p' << 48);
- const char _HTTPS_Mask1 = 's';
- const int _WS_Mask = 'w' | ('s' << 16);
- const long _WSS_Mask = 'w' | ('s' << 16) | ((long)'s' << 32) | ((long)':' << 48);
- const long _FTP_Mask = 'f' | ('t' << 16) | ((long)'p' << 32) | ((long)':' << 48);
- const long _FILE_Mask0 = 'f' | ('i' << 16) | ((long)'l' << 32) | ((long)'e' << 48);
- const long _GOPHER_Mask0 = 'g' | ('o' << 16) | ((long)'p' << 32) | ((long)'h' << 48);
- const int _GOPHER_Mask1 = 'e' | ('r' << 16);
- const long _MAILTO_Mask0 = 'm' | ('a' << 16) | ((long)'i' << 32) | ((long)'l' << 48);
- const int _MAILTO_Mask1 = 't' | ('o' << 16);
- const long _NEWS_Mask0 = 'n' | ('e' << 16) | ((long)'w' << 32) | ((long)'s' << 48);
- const long _NNTP_Mask0 = 'n' | ('n' << 16) | ((long)'t' << 32) | ((long)'p' << 48);
- const long _UUID_Mask0 = 'u' | ('u' << 16) | ((long)'i' << 32) | ((long)'d' << 48);
-
- const long _TELNET_Mask0 = 't' | ('e' << 16) | ((long)'l' << 32) | ((long)'n' << 48);
- const int _TELNET_Mask1 = 'e' | ('t' << 16);
-
- const long _NETXXX_Mask0 = 'n' | ('e' << 16) | ((long)'t' << 32) | ((long)'.' << 48);
- const long _NETTCP_Mask1 = 't' | ('c' << 16) | ((long)'p' << 32) | ((long)':' << 48);
- const long _NETPIPE_Mask1 = 'p' | ('i' << 16) | ((long)'p' << 32) | ((long)'e' << 48);
-
- const long _LDAP_Mask0 = 'l' | ('d' << 16) | ((long)'a' << 32) | ((long)'p' << 48);
-
-
- const long _LOWERCASE_Mask = 0x0020002000200020L;
- const int _INT_LOWERCASE_Mask = 0x00200020;
-
- if (nChars == 2)
- {
- // This is the only known scheme of length 2
- if ((unchecked((int)*lptr) | _INT_LOWERCASE_Mask) == _WS_Mask)
- {
- syntax = UriParser.WsUri;
- return true;
- }
- return false;
- }
+ Debug.Assert(error == ParsingError.None);
- //Map to a known scheme if possible
- //upgrade 4 letters to ASCII lower case, keep a false case to stay false
- switch (*lptr | _LOWERCASE_Mask)
+ switch (scheme.Length)
{
- case _HTTP_Mask0:
- if (nChars == 4)
- {
- syntax = UriParser.HttpUri;
- return true;
- }
- if (nChars == 5 && ((*(char*)(lptr + 1)) | 0x20) == _HTTPS_Mask1)
- {
- syntax = UriParser.HttpsUri;
- return true;
- }
- break;
- case _WSS_Mask:
- if (nChars == 3)
- {
- syntax = UriParser.WssUri;
- return true;
- }
- break;
- case _FILE_Mask0:
- if (nChars == 4)
- {
- syntax = UriParser.FileUri;
- return true;
- }
- break;
- case _FTP_Mask:
- if (nChars == 3)
- {
- syntax = UriParser.FtpUri;
- return true;
- }
- break;
-
- case _NEWS_Mask0:
- if (nChars == 4)
- {
- syntax = UriParser.NewsUri;
- return true;
- }
- break;
-
- case _NNTP_Mask0:
- if (nChars == 4)
- {
- syntax = UriParser.NntpUri;
- return true;
- }
+ case 2:
+ if (scheme.Equals("ws", StringComparison.OrdinalIgnoreCase)) return UriParser.WsUri;
break;
- case _UUID_Mask0:
- if (nChars == 4)
- {
- syntax = UriParser.UuidUri;
- return true;
- }
+ case 3:
+ if (scheme.Equals("wss", StringComparison.OrdinalIgnoreCase)) return UriParser.WssUri;
+ if (scheme.Equals("ftp", StringComparison.OrdinalIgnoreCase)) return UriParser.FtpUri;
break;
- case _GOPHER_Mask0:
- if (nChars == 6 && (*(int*)(lptr + 1) | _INT_LOWERCASE_Mask) == _GOPHER_Mask1)
- {
- syntax = UriParser.GopherUri;
- return true;
- }
- break;
- case _MAILTO_Mask0:
- if (nChars == 6 && (*(int*)(lptr + 1) | _INT_LOWERCASE_Mask) == _MAILTO_Mask1)
- {
- syntax = UriParser.MailToUri;
- return true;
- }
+ case 4:
+ if (scheme.Equals("http", StringComparison.OrdinalIgnoreCase)) return UriParser.HttpUri;
+ if (scheme.Equals("file", StringComparison.OrdinalIgnoreCase)) return UriParser.FileUri;
+ if (scheme.Equals("uuid", StringComparison.OrdinalIgnoreCase)) return UriParser.UuidUri;
+ if (scheme.Equals("nntp", StringComparison.OrdinalIgnoreCase)) return UriParser.NntpUri;
+ if (scheme.Equals("ldap", StringComparison.OrdinalIgnoreCase)) return UriParser.LdapUri;
+ if (scheme.Equals("news", StringComparison.OrdinalIgnoreCase)) return UriParser.NewsUri;
break;
- case _TELNET_Mask0:
- if (nChars == 6 && (*(int*)(lptr + 1) | _INT_LOWERCASE_Mask) == _TELNET_Mask1)
- {
- syntax = UriParser.TelnetUri;
- return true;
- }
+ case 5:
+ if (scheme.Equals("https", StringComparison.OrdinalIgnoreCase)) return UriParser.HttpsUri;
break;
- case _NETXXX_Mask0:
- if (nChars == 8 && (*(lptr + 1) | _LOWERCASE_Mask) == _NETPIPE_Mask1)
- {
- syntax = UriParser.NetPipeUri;
- return true;
- }
- else if (nChars == 7 && (*(lptr + 1) | _LOWERCASE_Mask) == _NETTCP_Mask1)
- {
- syntax = UriParser.NetTcpUri;
- return true;
- }
+ case 6:
+ if (scheme.Equals("mailto", StringComparison.OrdinalIgnoreCase)) return UriParser.MailToUri;
+ if (scheme.Equals("gopher", StringComparison.OrdinalIgnoreCase)) return UriParser.GopherUri;
+ if (scheme.Equals("telnet", StringComparison.OrdinalIgnoreCase)) return UriParser.TelnetUri;
break;
- case _LDAP_Mask0:
- if (nChars == 4)
- {
- syntax = UriParser.LdapUri;
- return true;
- }
+ case 7:
+ if (scheme.Equals("net.tcp", StringComparison.OrdinalIgnoreCase)) return UriParser.NetTcpUri;
break;
- default: break;
- }
- return false;
- }
-
- //
- // This will check whether a scheme string follows the rules
- //
- private static unsafe ParsingError CheckSchemeSyntax(ReadOnlySpan<char> span, ref UriParser? syntax)
- {
- static char ToLowerCaseAscii(char c) => char.IsAsciiLetterUpper(c) ? (char)(c | 0x20) : c;
-
- if (span.Length == 0)
- {
- return ParsingError.BadScheme;
- }
- // The first character must be an alpha. Validate that and store it as lower-case, as
- // all of the fast-path checks need that value.
- char firstLower = span[0];
- if (char.IsAsciiLetterUpper(firstLower))
- {
- firstLower = (char)(firstLower | 0x20);
- }
- else if (!char.IsAsciiLetterLower(firstLower))
- {
- return ParsingError.BadScheme;
- }
-
- // Special-case common and known schemes to avoid allocations and dictionary lookups in these cases.
- const int wsMask = 'w' << 8 | 's';
- const int ftpMask = 'f' << 16 | 't' << 8 | 'p';
- const int wssMask = 'w' << 16 | 's' << 8 | 's';
- const int fileMask = 'f' << 24 | 'i' << 16 | 'l' << 8 | 'e';
- const int httpMask = 'h' << 24 | 't' << 16 | 't' << 8 | 'p';
- const int mailMask = 'm' << 24 | 'a' << 16 | 'i' << 8 | 'l';
- switch (span.Length)
- {
- case 2:
- if (wsMask == (firstLower << 8 | ToLowerCaseAscii(span[1])))
- {
- syntax = UriParser.WsUri;
- return ParsingError.None;
- }
- break;
- case 3:
- switch (firstLower << 16 | ToLowerCaseAscii(span[1]) << 8 | ToLowerCaseAscii(span[2]))
- {
- case ftpMask:
- syntax = UriParser.FtpUri;
- return ParsingError.None;
- case wssMask:
- syntax = UriParser.WssUri;
- return ParsingError.None;
- }
- break;
- case 4:
- switch (firstLower << 24 | ToLowerCaseAscii(span[1]) << 16 | ToLowerCaseAscii(span[2]) << 8 | ToLowerCaseAscii(span[3]))
- {
- case httpMask:
- syntax = UriParser.HttpUri;
- return ParsingError.None;
- case fileMask:
- syntax = UriParser.FileUri;
- return ParsingError.None;
- }
- break;
- case 5:
- if (httpMask == (firstLower << 24 | ToLowerCaseAscii(span[1]) << 16 | ToLowerCaseAscii(span[2]) << 8 | ToLowerCaseAscii(span[3])) &&
- ToLowerCaseAscii(span[4]) == 's')
- {
- syntax = UriParser.HttpsUri;
- return ParsingError.None;
- }
- break;
- case 6:
- if (mailMask == (firstLower << 24 | ToLowerCaseAscii(span[1]) << 16 | ToLowerCaseAscii(span[2]) << 8 | ToLowerCaseAscii(span[3])) &&
- ToLowerCaseAscii(span[4]) == 't' && ToLowerCaseAscii(span[5]) == 'o')
- {
- syntax = UriParser.MailToUri;
- return ParsingError.None;
- }
+ case 8:
+ if (scheme.Equals("net.pipe", StringComparison.OrdinalIgnoreCase)) return UriParser.NetPipeUri;
break;
}
- // The scheme is not known. Validate all of the characters in the input.
- for (int i = 1; i < span.Length; i++)
+ // scheme = alpha *(alpha | digit | '+' | '-' | '.')
+ if (scheme.Length == 0 ||
+ !char.IsAsciiLetter(scheme[0]) ||
+ scheme.ContainsAnyExcept(s_schemeChars))
{
- char c = span[i];
- if (!char.IsAsciiLetterOrDigit(c) && c != '+' && c != '-' && c != '.')
- {
- return ParsingError.BadScheme;
- }
+ error = ParsingError.BadScheme;
+ return null;
}
- if (span.Length > c_MaxUriSchemeName)
+ if (scheme.Length > c_MaxUriSchemeName)
{
- return ParsingError.SchemeLimit;
+ error = ParsingError.SchemeLimit;
+ return null;
}
// Then look up the syntax in a string-based table.
-#pragma warning disable CS8500 // takes address of managed type
- ReadOnlySpan<char> tmpSpan = span; // avoid address exposing the span and impacting the other code in the method that uses it
- string str = string.Create(tmpSpan.Length, (IntPtr)(&tmpSpan), (buffer, spanPtr) =>
- {
- int charsWritten = (*(ReadOnlySpan<char>*)spanPtr).ToLowerInvariant(buffer);
- Debug.Assert(charsWritten == buffer.Length);
- });
-#pragma warning restore CS8500
- syntax = UriParser.FindOrFetchAsUnknownV1Syntax(str);
- return ParsingError.None;
+ return UriParser.FindOrFetchAsUnknownV1Syntax(UriHelper.SpanToLowerInvariantString(scheme));
}
//