JavaScriptEncoder should allow U+002F SOLIDUS by default (dotnet/corefx#39494)

author Levi Broderick <GrabYourPitchforks@users.noreply.github.com>

Tue, 16 Jul 2019 14:35:32 +0000 (07:35 -0700)

committer GitHub <noreply@github.com>

Tue, 16 Jul 2019 14:35:32 +0000 (07:35 -0700)
author Levi Broderick <GrabYourPitchforks@users.noreply.github.com>
Tue, 16 Jul 2019 14:35:32 +0000 (07:35 -0700)
committer GitHub <noreply@github.com>
Tue, 16 Jul 2019 14:35:32 +0000 (07:35 -0700)
diff --git a/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs b/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs

index cfa099d..104e02d 100644 (file)
--- a/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs
+++ b/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs
@@ -71,10 +71,11 @@ namespace System.Text.Encodings.Web
              // so this offers extra protection.
              DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);
  
+            // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
+            // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
              _allowedCharacters.ForbidCharacter('\\');
-            _allowedCharacters.ForbidCharacter('/');
              
-            // Forbid GRAVE ACCENT \u0060 character.
+            // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).
              _allowedCharacters.ForbidCharacter('`'); 
          }
  
@@ -111,7 +112,6 @@ namespace System.Text.Encodings.Web
          static readonly char[] s_n = new char[] { '\\', 'n' };
          static readonly char[] s_f = new char[] { '\\', 'f' };
          static readonly char[] s_r = new char[] { '\\', 'r' };
-        static readonly char[] s_forward = new char[] { '\\', '/' };
          static readonly char[] s_back = new char[] { '\\', '\\' };
  
          // Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
@@ -125,8 +125,12 @@ namespace System.Text.Encodings.Web
                  throw new ArgumentNullException(nameof(buffer));
              }
              // ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
-            // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/".
-            // (In ECMA-262 this character is a NonEscape character.)
+            // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/"
+            // (in ECMA-262 this character is a NonEscape character); however, we
+            // don't encode SOLIDUS by default unless the caller has provided an
+            // explicit bitmap which does not contain it. In this case we'll assume
+            // that the caller didn't want a SOLIDUS written to the output at all,
+            // so it should be written using "\u002F" encoding.
              // HTML-specific characters (including apostrophe and quotes) will
              // be written out as numeric entities for defense-in-depth.
              // See UnicodeEncoderBase ctor comments for more info.
@@ -141,7 +145,6 @@ namespace System.Text.Encodings.Web
                  case '\n': toCopy = s_n; break;
                  case '\f': toCopy = s_f; break;
                  case '\r': toCopy = s_r; break;
-                case '/': toCopy = s_forward; break;
                  case '\\': toCopy = s_back; break;
                  default: return TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten); 
              }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs b/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs

index fd24ee3..91e9ac0 100644 (file)
--- a/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
@@ -5,6 +5,7 @@
  using System;
  using System.Globalization;
  using System.IO;
+using System.Linq;
  using System.Text.Encodings.Web;
  using System.Text.Unicode;
  using Xunit;
@@ -104,7 +105,6 @@ namespace Microsoft.Framework.WebEncoders
          [InlineData("\"", @"\u0022")]
          [InlineData("+", @"\u002B")]
          [InlineData("\\", @"\\")]
-        [InlineData("/", @"\/")]
          [InlineData("\n", @"\n")]
          [InlineData("\t", @"\t")]
          [InlineData("\r", @"\r")]
@@ -143,7 +143,6 @@ namespace Microsoft.Framework.WebEncoders
                      else if (input == "\f") { expected = @"\f"; }
                      else if (input == "\r") { expected = @"\r"; }
                      else if (input == "\\") { expected = @"\\"; }
-                    else if (input == "/") { expected = @"\/"; }
                      else if (input == "`") { expected = @"\u0060"; }
                      else
                      {
@@ -195,6 +194,35 @@ namespace Microsoft.Framework.WebEncoders
          }
  
          [Fact]
+        public void JavaScriptStringEncode_NoRangesAllowed_EmitsShortFormForCertainCodePoints()
+        {
+            // This test ensures that when we're encoding, we always emit the "\uXXXX" form of the
+            // code point except for very specific code points where we allow a shorter representation.
+
+            // Arrange
+            JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.None); // allow no codepoints
+
+            // "[U+0000][U+0001]...[U+007F]"
+            string input = new string(Enumerable.Range(0, 128).Select(i => (char)i).ToArray());
+
+            // @"\u0000\u0001..\u007F", then replace certain specific code points
+            string expected = string.Concat(Enumerable.Range(0, 128).Select(i => FormattableString.Invariant($@"\u{i:X4}")));
+
+            expected = expected.Replace(@"\u0008", @"\b"); // U+0008 BACKSPACE -> "\b"
+            expected = expected.Replace(@"\u0009", @"\t"); // U+0009 CHARACTER TABULATION -> "\t"
+            expected = expected.Replace(@"\u000A", @"\n"); // U+000A LINE FEED -> "\n"
+            expected = expected.Replace(@"\u000C", @"\f"); // U+000C FORM FEED -> "\f"
+            expected = expected.Replace(@"\u000D", @"\r"); // U+000D CARRIAGE RETURN -> "\n"
+            expected = expected.Replace(@"\u005C", @"\\"); // U+005C REVERSE SOLIDUS -> "\\"
+
+            // Act
+            string retVal = encoder.JavaScriptStringEncode(input);
+
+            // Assert
+            Assert.Equal(expected, retVal);
+        }
+
+        [Fact]
          public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
          {
              // Arrange
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs

index abddc6c..891fcf6 100644 (file)
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs
@@ -15,19 +15,19 @@ namespace System.Text.Json
      {
          // Only allow ASCII characters between ' ' (0x20) and '~' (0x7E), inclusively,
          // but exclude characters that need to be escaped as hex: '"', '\'', '&', '+', '<', '>', '`'
-        // and exclude characters that need to be escaped by adding a backslash: '\n', '\r', '\t', '\\', '/', '\b', '\f'
+        // and exclude characters that need to be escaped by adding a backslash: '\n', '\r', '\t', '\\', '\b', '\f'
          //
          // non-zero = allowed, 0 = disallowed
          public const int LastAsciiCharacter = 0x7F;
          private static ReadOnlySpan<byte> AllowList => new byte[LastAsciiCharacter + 1] {
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
-            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
-            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
-            0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0000..U+000F
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0010..U+001F
+            1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, // U+0020..U+002F
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // U+0030..U+003F
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0040..U+004F
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // U+0050..U+005F
+            0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0060..U+006F
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // U+0070..U+007F
          };
  
          private const string HexFormatString = "X4";
@@ -184,9 +184,6 @@ namespace System.Text.Json
                  case JsonConstants.BackSlash:
                      destination[written++] = (byte)'\\';
                      break;
-                case JsonConstants.Slash:
-                    destination[written++] = (byte)'/';
-                    break;
                  case JsonConstants.BackSpace:
                      destination[written++] = (byte)'b';
                      break;
@@ -477,9 +474,6 @@ namespace System.Text.Json
                  case JsonConstants.BackSlash:
                      destination[written++] = '\\';
                      break;
-                case JsonConstants.Slash:
-                    destination[written++] = '/';
-                    break;
                  case JsonConstants.BackSpace:
                      destination[written++] = 'b';
                      break;
diff --git a/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClass.cs b/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClass.cs

index 965232e..39e93bd 100644 (file)
--- a/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClass.cs
+++ b/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClass.cs
@@ -122,7 +122,7 @@ namespace System.Text.Json.Serialization.Tests
                  @"""MyDateTime"" : ""2019-01-30T12:01:02.0000000Z""," +
                  @"""MyDateTimeOffset"" : ""2019-01-30T12:01:02.0000000+01:00""," +
                  @"""MyGuid"" : ""1B33498A-7B7D-4DDA-9C13-F6AA4AB449A6""," +
-                @"""MyUri"" : ""https:\/\/github.com\/dotnet\/corefx""," +
+                @"""MyUri"" : ""https://github.com/dotnet/corefx""," +
                  @"""MyEnum"" : 2," + // int by default
                  @"""MyInt64Enum"" : -9223372036854775808," +
                  @"""MyUInt64Enum"" : 18446744073709551615," +
@@ -156,7 +156,7 @@ namespace System.Text.Json.Serialization.Tests
                  @"""MyDateTimeArray"" : [""2019-01-30T12:01:02.0000000Z""]," +
                  @"""MyDateTimeOffsetArray"" : [""2019-01-30T12:01:02.0000000+01:00""]," +
                  @"""MyGuidArray"" : [""1B33498A-7B7D-4DDA-9C13-F6AA4AB449A6""]," +
-                @"""MyUriArray"" : [""https:\/\/github.com\/dotnet\/corefx""]," +
+                @"""MyUriArray"" : [""https://github.com/dotnet/corefx""]," +
                  @"""MyEnumArray"" : [2]," + // int by default
                  @"""MyInt16TwoDimensionArray"" : [[10, 11],[20, 21]]," +
                  @"""MyInt16TwoDimensionList"" : [[10, 11],[20, 21]]," +
diff --git a/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClassWithObjectArrays.cs b/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClassWithObjectArrays.cs

index ae28857..a8e7b2c 100644 (file)
--- a/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClassWithObjectArrays.cs
+++ b/src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClassWithObjectArrays.cs
@@ -47,7 +47,7 @@ namespace System.Text.Json.Serialization.Tests
                  @"""MyDecimal"" : [3.3]," +
                  @"""MyDateTime"" : [""2019-01-30T12:01:02.0000000Z""]," +
                  @"""MyGuid"" : [""97E9F02C-337E-4615-B26C-0020F5DC28C9""]," +
-                @"""MyUri"" : [""https:\/\/github.com\/dotnet\/corefx""]," +
+                @"""MyUri"" : [""https://github.com/dotnet/corefx""]," +
                  @"""MyEnum"" : [2]" + // int by default
              @"}";
  
diff --git a/src/libraries/System.Text.Json/tests/Serialization/Value.WriteTests.cs b/src/libraries/System.Text.Json/tests/Serialization/Value.WriteTests.cs

index 9bf00ed..297e7bb 100644 (file)
--- a/src/libraries/System.Text.Json/tests/Serialization/Value.WriteTests.cs
+++ b/src/libraries/System.Text.Json/tests/Serialization/Value.WriteTests.cs
@@ -60,12 +60,12 @@ namespace System.Text.Json.Serialization.Tests
  
              {
                  Uri uri = new Uri("https://domain/path");
-                Assert.Equal(@"""https:\/\/domain\/path""", JsonSerializer.Serialize(uri));
+                Assert.Equal(@"""https://domain/path""", JsonSerializer.Serialize(uri));
              }
  
              {
                  Uri.TryCreate("~/path", UriKind.RelativeOrAbsolute, out Uri uri);
-                Assert.Equal(@"""~\/path""", JsonSerializer.Serialize(uri));
+                Assert.Equal(@"""~/path""", JsonSerializer.Serialize(uri));
              }
  
              // The next two scenarios validate that we're NOT using Uri.ToString() for serializing Uri. The serializer
@@ -74,14 +74,14 @@ namespace System.Text.Json.Serialization.Tests
              {
                  // ToString would collapse the relative segment
                  Uri uri = new Uri("http://a/b/../c");
-                Assert.Equal(@"""http:\/\/a\/b\/..\/c""", JsonSerializer.Serialize(uri));
+                Assert.Equal(@"""http://a/b/../c""", JsonSerializer.Serialize(uri));
              }
  
              {
                  // "%20" gets turned into a space by Uri.ToString()
                  // https://coding.abel.nu/2014/10/beware-of-uri-tostring/
                  Uri uri = new Uri("http://localhost?p1=Value&p2=A%20B%26p3%3DFooled!");
-                Assert.Equal(@"""http:\/\/localhost?p1=Value\u0026p2=A%20B%26p3%3DFooled!""", JsonSerializer.Serialize(uri));
+                Assert.Equal(@"""http://localhost?p1=Value\u0026p2=A%20B%26p3%3DFooled!""", JsonSerializer.Serialize(uri));
              }
          }
      }
diff --git a/src/libraries/System.Text.Json/tests/Utf8JsonWriterTests.cs b/src/libraries/System.Text.Json/tests/Utf8JsonWriterTests.cs

index be9e74a..e3f5415 100644 (file)
--- a/src/libraries/System.Text.Json/tests/Utf8JsonWriterTests.cs
+++ b/src/libraries/System.Text.Json/tests/Utf8JsonWriterTests.cs
@@ -2487,7 +2487,7 @@ namespace System.Text.Json.Tests
              jsonUtf8.Flush();
  
              var builder = new StringBuilder();
-            builder.Append("\"ZGRkZPvvvmRkZGRkZGRkABC\\/");
+            builder.Append("\"ZGRkZPvvvmRkZGRkZGRkABC/");
              for (int i = 0; i < 60; i++)
              {
                  builder.Append("ZGRk");
author	Levi Broderick <GrabYourPitchforks@users.noreply.github.com>
	Tue, 16 Jul 2019 14:35:32 +0000 (07:35 -0700)
committer	GitHub <noreply@github.com>
	Tue, 16 Jul 2019 14:35:32 +0000 (07:35 -0700)
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs		patch \| blob \| history
src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs		patch \| blob \| history
src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs		patch \| blob \| history
src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClass.cs		patch \| blob \| history
src/libraries/System.Text.Json/tests/Serialization/TestClasses.SimpleTestClassWithObjectArrays.cs		patch \| blob \| history
src/libraries/System.Text.Json/tests/Serialization/Value.WriteTests.cs		patch \| blob \| history
src/libraries/System.Text.Json/tests/Utf8JsonWriterTests.cs		patch \| blob \| history