--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Text.Internal;
+using System.Text.Unicode;
+
+namespace System.Text.Encodings.Web
+{
+ internal sealed class UnsafeRelaxedJavaScriptEncoder : JavaScriptEncoder
+ {
+ private readonly AllowedCharactersBitmap _allowedCharacters;
+
+ internal static readonly UnsafeRelaxedJavaScriptEncoder s_singleton = new UnsafeRelaxedJavaScriptEncoder(new TextEncoderSettings(UnicodeRanges.All));
+
+ private UnsafeRelaxedJavaScriptEncoder(TextEncoderSettings filter)
+ {
+ if (filter == null)
+ {
+ throw new ArgumentNullException(nameof(filter));
+ }
+
+ _allowedCharacters = filter.GetAllowedCharacters();
+
+ // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
+ // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
+ _allowedCharacters.ForbidUndefinedCharacters();
+
+ // '"' (U+0022 QUOTATION MARK) must always be escaped in Javascript / ECMAScript / JSON.
+ _allowedCharacters.ForbidCharacter('\"'); // can be used to escape attributes
+
+ // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
+ // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
+ _allowedCharacters.ForbidCharacter('\\');
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override bool WillEncode(int unicodeScalar)
+ {
+ if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
+ {
+ return true;
+ }
+
+ return !_allowedCharacters.IsUnicodeScalarAllowed(unicodeScalar);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public unsafe override int FindFirstCharacterToEncode(char* text, int textLength)
+ {
+ if (text == null)
+ {
+ throw new ArgumentNullException(nameof(text));
+ }
+
+ return _allowedCharacters.FindFirstCharacterToEncode(text, textLength);
+ }
+
+ // The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
+ // We don't need to worry about astral code points since they're represented as encoded
+ // surrogate pairs in the output.
+ public override int MaxOutputCharactersPerInputCharacter => 12; // "\uFFFF\uFFFF" is the longest encoded form
+
+ private static readonly char[] s_b = new char[] { '\\', 'b' };
+ private static readonly char[] s_t = new char[] { '\\', 't' };
+ private static readonly char[] s_n = new char[] { '\\', 'n' };
+ private static readonly char[] s_f = new char[] { '\\', 'f' };
+ private static readonly char[] s_r = new char[] { '\\', 'r' };
+ private static readonly char[] s_back = new char[] { '\\', '\\' };
+ private static readonly char[] s_doubleQuote = new char[] { '\\', '"' };
+
+ // Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
+ // See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
+ // http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
+ // http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+ public unsafe override bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
+ {
+ if (buffer == null)
+ {
+ throw new ArgumentNullException(nameof(buffer));
+ }
+ // ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
+ // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/"
+ // (in ECMA-262 this character is a NonEscape character); however, we
+ // don't encode SOLIDUS by default unless the caller has provided an
+ // explicit bitmap which does not contain it. In this case we'll assume
+ // that the caller didn't want a SOLIDUS written to the output at all,
+ // so it should be written using "\u002F" encoding.
+ // HTML-specific characters (including apostrophe and quotes) will
+ // be written out as numeric entities for defense-in-depth.
+ // See UnicodeEncoderBase ctor comments for more info.
+
+ if (!WillEncode(unicodeScalar))
+ {
+ return TryWriteScalarAsChar(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+
+ char[] toCopy;
+ switch (unicodeScalar)
+ {
+ case '\"':
+ toCopy = s_doubleQuote;
+ break;
+ case '\b':
+ toCopy = s_b;
+ break;
+ case '\t':
+ toCopy = s_t;
+ break;
+ case '\n':
+ toCopy = s_n;
+ break;
+ case '\f':
+ toCopy = s_f;
+ break;
+ case '\r':
+ toCopy = s_r;
+ break;
+ case '\\':
+ toCopy = s_back;
+ break;
+ default:
+ return TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+ return TryCopyCharacters(toCopy, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+
+ private static unsafe bool TryWriteEncodedScalarAsNumericEntity(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
+ {
+ Debug.Assert(buffer != null && length >= 0);
+
+ if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
+ {
+ // Convert this back to UTF-16 and write out both characters.
+ UnicodeHelpers.GetUtf16SurrogatePairFromAstralScalarValue(unicodeScalar, out char leadingSurrogate, out char trailingSurrogate);
+ if (TryWriteEncodedSingleCharacter(leadingSurrogate, buffer, length, out int leadingSurrogateCharactersWritten) &&
+ TryWriteEncodedSingleCharacter(trailingSurrogate, buffer + leadingSurrogateCharactersWritten, length - leadingSurrogateCharactersWritten, out numberOfCharactersWritten)
+ )
+ {
+ numberOfCharactersWritten += leadingSurrogateCharactersWritten;
+ return true;
+ }
+ else
+ {
+ numberOfCharactersWritten = 0;
+ return false;
+ }
+ }
+ else
+ {
+ // This is only a single character.
+ return TryWriteEncodedSingleCharacter(unicodeScalar, buffer, length, out numberOfCharactersWritten);
+ }
+ }
+
+ // Writes an encoded scalar value (in the BMP) as a JavaScript-escaped character.
+ private static unsafe bool TryWriteEncodedSingleCharacter(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
+ {
+ Debug.Assert(buffer != null && length >= 0);
+ Debug.Assert(!UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar), "The incoming value should've been in the BMP.");
+
+ if (length < 6)
+ {
+ numberOfCharactersWritten = 0;
+ return false;
+ }
+
+ // Encode this as 6 chars "\uFFFF".
+ *buffer = '\\';
+ buffer++;
+ *buffer = 'u';
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit(unicodeScalar >> 12);
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 8) & 0xFU));
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 4) & 0xFU));
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit((int)(unicodeScalar & 0xFU));
+
+ numberOfCharactersWritten = 6;
+ return true;
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Globalization;
+using System.IO;
+using System.Text.Encodings.Web;
+using System.Text.Unicode;
+using Xunit;
+
+namespace Microsoft.Framework.WebEncoders
+{
+ public partial class JavaScriptStringEncoderTests
+ {
+ [Fact]
+ public void TestSurrogate_Relaxed()
+ {
+ Assert.Equal("\\uD83D\\uDCA9", JavaScriptEncoder.UnsafeRelaxedJsonEscaping.Encode("\U0001f4a9"));
+
+ using var writer = new StringWriter();
+
+ JavaScriptEncoder.UnsafeRelaxedJsonEscaping.Encode(writer, "\U0001f4a9");
+ Assert.Equal("\\uD83D\\uDCA9", writer.GetStringBuilder().ToString());
+ }
+
+ [Fact]
+ public void Relaxed_EquivalentToAll_WithExceptions()
+ {
+ // Arrange
+ JavaScriptStringEncoder controlEncoder = new JavaScriptStringEncoder(UnicodeRanges.All);
+ JavaScriptStringEncoder testEncoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act & assert
+ for (int i = 0; i <= char.MaxValue; i++)
+ {
+ if (i == '"' || i == '&' || i == '<' || i == '>' || i == '+' || i == '\'' || i == '`')
+ {
+ string input = new string((char)i, 1);
+ Assert.NotEqual(controlEncoder.JavaScriptStringEncode(input), testEncoder.JavaScriptStringEncode(input));
+ continue;
+ }
+
+ if (!IsSurrogateCodePoint(i))
+ {
+ string input = new string((char)i, 1);
+ Assert.Equal(controlEncoder.JavaScriptStringEncode(input), testEncoder.JavaScriptStringEncode(input));
+ }
+ }
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_Relaxed_StillEncodesForbiddenChars_Simple_Escaping()
+ {
+ // The following two calls could be simply InlineData to the Theory below
+ // Unfortunately, the xUnit logger fails to escape the inputs when logging the test results,
+ // and so the suite fails despite all tests passing.
+ // TODO: I will try to fix it in xUnit, but for now this is a workaround to enable these tests.
+ JavaScriptStringEncode_Relaxed_StillEncodesForbiddenChars_Simple("\b", @"\b");
+ JavaScriptStringEncode_Relaxed_StillEncodesForbiddenChars_Simple("\f", @"\f");
+ }
+
+ [Theory]
+ [InlineData("\"", "\\\"")]
+ [InlineData("\\", @"\\")]
+ [InlineData("\n", @"\n")]
+ [InlineData("\t", @"\t")]
+ [InlineData("\r", @"\r")]
+ public void JavaScriptStringEncode_Relaxed_StillEncodesForbiddenChars_Simple(string input, string expected)
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act
+ string retVal = encoder.JavaScriptStringEncode(input);
+
+ // Assert
+ Assert.Equal(expected, retVal);
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_Relaxed_StillEncodesForbiddenChars_Extended()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act & assert - BMP chars
+ for (int i = 0; i <= 0xFFFF; i++)
+ {
+ string input = new string((char)i, 1);
+ string expected;
+ if (IsSurrogateCodePoint(i))
+ {
+ expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
+ }
+ else
+ {
+ if (input == "\b")
+ {
+ expected = @"\b";
+ }
+ else if (input == "\t")
+ {
+ expected = @"\t";
+ }
+ else if (input == "\n")
+ {
+ expected = @"\n";
+ }
+ else if (input == "\f")
+ {
+ expected = @"\f";
+ }
+ else if (input == "\r")
+ {
+ expected = @"\r";
+ }
+ else if (input == "\\")
+ {
+ expected = @"\\";
+ }
+ else if (input == "\"")
+ {
+ expected = "\\\"";
+ }
+ else
+ {
+ bool mustEncode = false;
+
+ if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
+ {
+ mustEncode = true; // control char
+ }
+ else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+ {
+ mustEncode = true; // undefined (or otherwise disallowed) char
+ }
+
+ if (mustEncode)
+ {
+ expected = string.Format(CultureInfo.InvariantCulture, @"\u{0:X4}", i);
+ }
+ else
+ {
+ expected = input; // no encoding
+ }
+ }
+ }
+
+ string retVal = encoder.JavaScriptStringEncode(input);
+ Assert.Equal(expected, retVal);
+ }
+
+ // Act & assert - astral chars
+ for (int i = 0x10000; i <= 0x10FFFF; i++)
+ {
+ string input = char.ConvertFromUtf32(i);
+ string expected = string.Format(CultureInfo.InvariantCulture, @"\u{0:X4}\u{1:X4}", (uint)input[0], (uint)input[1]);
+ string retVal = encoder.JavaScriptStringEncode(input);
+ Assert.Equal(expected, retVal);
+ }
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping; // allow all codepoints
+
+ // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
+ const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
+ const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD\\uD800\\uDFFFe\uFFFD"; // 'D800' 'DFFF' was preserved since it's valid
+
+ // Act
+ string retVal = encoder.JavaScriptStringEncode(input);
+
+ // Assert
+ Assert.Equal(expected, retVal);
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_EmptyStringInput_ReturnsEmptyString_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act & assert
+ Assert.Equal("", encoder.JavaScriptStringEncode(""));
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+ string input = "Hello, there!";
+
+ // Act & assert
+ Assert.Same(input, encoder.JavaScriptStringEncode(input));
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_NullInput_Throws_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ Assert.Throws<ArgumentNullException>(() => { encoder.JavaScriptStringEncode(null); });
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_WithCharsRequiringEncodingAtBeginning_Relaxed()
+ {
+ Assert.Equal(@"\\Hello, there!", JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping.JavaScriptStringEncode("\\Hello, there!"));
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_WithCharsRequiringEncodingAtEnd_Relaxed()
+ {
+ Assert.Equal(@"Hello, there!\\", JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping.JavaScriptStringEncode("Hello, there!\\"));
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_WithCharsRequiringEncodingInMiddle_Relaxed()
+ {
+ Assert.Equal(@"Hello, \\there!", JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping.JavaScriptStringEncode("Hello, \\there!"));
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_WithCharsRequiringEncodingInterspersed_Relaxed()
+ {
+ Assert.Equal("Hello, \\\\there\\\"!", JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping.JavaScriptStringEncode("Hello, \\there\"!"));
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_CharArray_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ using var output = new StringWriter();
+
+ // Act
+ encoder.JavaScriptStringEncode("Hello\\world!".ToCharArray(), 3, 5, output);
+
+ // Assert
+ Assert.Equal(@"lo\\wo", output.ToString());
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_StringSubstring_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ using var output = new StringWriter();
+
+ // Act
+ encoder.JavaScriptStringEncode("Hello\\world!", 3, 5, output);
+
+ // Assert
+ Assert.Equal(@"lo\\wo", output.ToString());
+ }
+
+ [Theory]
+ [InlineData("\"", "\\\"")]
+ [InlineData("'", "'")]
+ public void JavaScriptStringEncode_Quotes_Relaxed(string input, string expected)
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act
+ string retVal = encoder.JavaScriptStringEncode(input);
+
+ // Assert
+ Assert.Equal(expected, retVal);
+ }
+
+ [Theory]
+ [InlineData("hello+world", "hello+world")]
+ [InlineData("hello<world>", "hello<world>")]
+ [InlineData("hello&world", "hello&world")]
+ public void JavaScriptStringEncode_DoesOutputHtmlSensitiveCharacters_Relaxed(string input, string expected)
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act
+ string retVal = encoder.JavaScriptStringEncode(input);
+
+ // Assert
+ Assert.Equal(expected, retVal);
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_AboveAscii_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act & assert
+ for (int i = 0x128; i <= 0xFFFF; i++)
+ {
+ if (IsSurrogateCodePoint(i))
+ {
+ continue; // surrogates don't matter here
+ }
+
+ UnicodeCategory category = char.GetUnicodeCategory((char)i);
+ if (category != UnicodeCategory.NonSpacingMark)
+ {
+ continue; // skip undefined characters like U+0378, or spacing characters like U+2028
+ }
+
+ string javaScriptStringEncoded = encoder.JavaScriptStringEncode(char.ConvertFromUtf32(i));
+ Assert.True(char.ConvertFromUtf32(i) == javaScriptStringEncoded, i.ToString());
+ }
+ }
+
+ [Fact]
+ public void JavaScriptStringEncode_ControlCharacters_Relaxed()
+ {
+ // Arrange
+ JavaScriptStringEncoder encoder = JavaScriptStringEncoder.UnsafeRelaxedJsonEscaping;
+
+ // Act & assert
+ for (int i = 0; i <= 0x1F; i++)
+ {
+ // Skip characters that are escaped using '\\' since they are covered in other tests.
+ if (i == '\b' || i == '\f' || i == '\n' || i == '\r' || i == '\t')
+ {
+ continue;
+ }
+ string javaScriptStringEncoded = encoder.JavaScriptStringEncode(char.ConvertFromUtf32(i));
+ string expected = string.Format("\\u00{0:X2}", i);
+ Assert.Equal(expected, javaScriptStringEncoded);
+ }
+ }
+ }
+}