// so this offers extra protection.
DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);
+ // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
+ // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
_allowedCharacters.ForbidCharacter('\\');
- _allowedCharacters.ForbidCharacter('/');
- // Forbid GRAVE ACCENT \u0060 character.
+ // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).
_allowedCharacters.ForbidCharacter('`');
}
static readonly char[] s_n = new char[] { '\\', 'n' };
static readonly char[] s_f = new char[] { '\\', 'f' };
static readonly char[] s_r = new char[] { '\\', 'r' };
- static readonly char[] s_forward = new char[] { '\\', '/' };
static readonly char[] s_back = new char[] { '\\', '\\' };
// Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
throw new ArgumentNullException(nameof(buffer));
}
// ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
- // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/".
- // (In ECMA-262 this character is a NonEscape character.)
+ // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/"
+ // (in ECMA-262 this character is a NonEscape character); however, we
+ // don't encode SOLIDUS by default unless the caller has provided an
+ // explicit bitmap which does not contain it. In this case we'll assume
+ // that the caller didn't want a SOLIDUS written to the output at all,
+ // so it should be written using "\u002F" encoding.
// HTML-specific characters (including apostrophe and quotes) will
// be written out as numeric entities for defense-in-depth.
// See UnicodeEncoderBase ctor comments for more info.
case '\n': toCopy = s_n; break;
case '\f': toCopy = s_f; break;
case '\r': toCopy = s_r; break;
- case '/': toCopy = s_forward; break;
case '\\': toCopy = s_back; break;
default: return TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
}
using System;
using System.Globalization;
using System.IO;
+using System.Linq;
using System.Text.Encodings.Web;
using System.Text.Unicode;
using Xunit;
[InlineData("\"", @"\u0022")]
[InlineData("+", @"\u002B")]
[InlineData("\\", @"\\")]
- [InlineData("/", @"\/")]
[InlineData("\n", @"\n")]
[InlineData("\t", @"\t")]
[InlineData("\r", @"\r")]
else if (input == "\f") { expected = @"\f"; }
else if (input == "\r") { expected = @"\r"; }
else if (input == "\\") { expected = @"\\"; }
- else if (input == "/") { expected = @"\/"; }
else if (input == "`") { expected = @"\u0060"; }
else
{
}
[Fact]
+ public void JavaScriptStringEncode_NoRangesAllowed_EmitsShortFormForCertainCodePoints()
+ {
+ // This test ensures that when we're encoding, we always emit the "\uXXXX" form of the
+ // code point except for very specific code points where we allow a shorter representation.
+
+ // Arrange
+ JavaScriptStringEncoder encoder = new JavaScriptStringEncoder(UnicodeRanges.None); // allow no codepoints
+
+ // "[U+0000][U+0001]...[U+007F]"
+ string input = new string(Enumerable.Range(0, 128).Select(i => (char)i).ToArray());
+
+ // @"\u0000\u0001..\u007F", then replace certain specific code points
+ string expected = string.Concat(Enumerable.Range(0, 128).Select(i => FormattableString.Invariant($@"\u{i:X4}")));
+
+ expected = expected.Replace(@"\u0008", @"\b"); // U+0008 BACKSPACE -> "\b"
+ expected = expected.Replace(@"\u0009", @"\t"); // U+0009 CHARACTER TABULATION -> "\t"
+ expected = expected.Replace(@"\u000A", @"\n"); // U+000A LINE FEED -> "\n"
+ expected = expected.Replace(@"\u000C", @"\f"); // U+000C FORM FEED -> "\f"
+ expected = expected.Replace(@"\u000D", @"\r"); // U+000D CARRIAGE RETURN -> "\n"
+ expected = expected.Replace(@"\u005C", @"\\"); // U+005C REVERSE SOLIDUS -> "\\"
+
+ // Act
+ string retVal = encoder.JavaScriptStringEncode(input);
+
+ // Assert
+ Assert.Equal(expected, retVal);
+ }
+
+ [Fact]
public void JavaScriptStringEncode_BadSurrogates_ReturnsUnicodeReplacementChar()
{
// Arrange
{
// Only allow ASCII characters between ' ' (0x20) and '~' (0x7E), inclusively,
// but exclude characters that need to be escaped as hex: '"', '\'', '&', '+', '<', '>', '`'
- // and exclude characters that need to be escaped by adding a backslash: '\n', '\r', '\t', '\\', '/', '\b', '\f'
+ // and exclude characters that need to be escaped by adding a backslash: '\n', '\r', '\t', '\\', '\b', '\f'
//
// non-zero = allowed, 0 = disallowed
public const int LastAsciiCharacter = 0x7F;
private static ReadOnlySpan<byte> AllowList => new byte[LastAsciiCharacter + 1] {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0000..U+000F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0010..U+001F
+ 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, // U+0020..U+002F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // U+0030..U+003F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0040..U+004F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // U+0050..U+005F
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0060..U+006F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // U+0070..U+007F
};
private const string HexFormatString = "X4";
case JsonConstants.BackSlash:
destination[written++] = (byte)'\\';
break;
- case JsonConstants.Slash:
- destination[written++] = (byte)'/';
- break;
case JsonConstants.BackSpace:
destination[written++] = (byte)'b';
break;
case JsonConstants.BackSlash:
destination[written++] = '\\';
break;
- case JsonConstants.Slash:
- destination[written++] = '/';
- break;
case JsonConstants.BackSpace:
destination[written++] = 'b';
break;