From: Stephen Toub Date: Mon, 24 Jul 2023 16:23:56 +0000 (-0400) Subject: Use JIT intrinsic for UTF8 encoding in Utf8.TryWrite's AppendLiteral (#89376) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~819 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=deeeb55ebe596f50cbc42263b9e68f4a9697db46;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Use JIT intrinsic for UTF8 encoding in Utf8.TryWrite's AppendLiteral (#89376) * Use JIT intrinsic for UTF8 encoding in Utf8.TryWrite's AppendLiteral * Remove IsKnownConstant --- diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs index 31f61b1..a0358cc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs @@ -155,6 +155,7 @@ namespace System.Text return base.TryGetBytes(chars, bytes, out bytesWritten); } + /// Same as Encoding.UTF8.TryGetBytes, except with refs, returning the number of bytes written (or -1 if the operation fails), and optimized for a constant input. [MethodImpl(MethodImplOptions.NoInlining)] [Intrinsic] // Can be unrolled by JIT internal static unsafe int ReadUtf8(ref char input, int inputLength, ref byte output, int outputLength) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs index 53a3916..2301844 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs @@ -410,15 +410,28 @@ namespace System.Text.Unicode /// Writes the specified string to the handler. /// The string to write. /// true if the value could be formatted to the span; otherwise, false. - public bool AppendLiteral(string value) => AppendFormatted(value.AsSpan()); - - // TODO https://github.com/dotnet/csharplang/issues/7072: - // Add this if/when C# supports u8 literals with string interpolation. - // If that happens prior to this type being released, the above AppendLiteral(string) - // should also be removed. If that doesn't happen, we should look into ways to optimize - // the above AppendLiteral, such as by making the underlying encoding operation a JIT - // intrinsic that can emit substitute a "abc"u8 equivalent for an "abc" string literal. - //public bool AppendLiteral(scoped ReadOnlySpan value) => AppendFormatted(value); + [MethodImpl(MethodImplOptions.AggressiveInlining)] // we want 'value' exposed to the JIT as a constant + public bool AppendLiteral(string value) + { + if (value is not null) + { + Span dest = _destination.Slice(_pos); + + // The 99.999% for AppendLiteral is to be called with a const string. + // ReadUtf8 is a JIT intrinsic that can do the UTF8 encoding at JIT time. + int bytesWritten = UTF8Encoding.UTF8EncodingSealed.ReadUtf8( + ref value.GetRawStringData(), value.Length, + ref MemoryMarshal.GetReference(dest), dest.Length); + if (bytesWritten < 0) + { + return Fail(); + } + + _pos += bytesWritten; + } + + return true; + } /// Writes the specified value to the handler. /// The value to write.