PR feedback: Clarify charIsNonAscii vector usage

author Levi Broderick <levib@microsoft.com>

Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)

committer Levi Broderick <levib@microsoft.com>

Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)
author Levi Broderick <levib@microsoft.com>
Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)
committer Levi Broderick <levib@microsoft.com>
Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs

index 5f044b1..46c068d 100644 (file)
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
@@ -87,6 +87,11 @@ namespace System.Text.Unicode
                          Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
                          uint mask;
  
+                        // The 'charIsNonAscii' vector we're about to build will have the 0x8000 or the 0x0080
+                        // bit set (but not both!) only if the corresponding input char is non-ASCII. Which of
+                        // the two bits is set doesn't matter, as will be explained in the diagram a few lines
+                        // below.
+
                          Vector128<ushort> charIsNonAscii;
                          if (Sse41.IsSupported)
                          {
@@ -99,6 +104,12 @@ namespace System.Text.Unicode
                              charIsNonAscii = Sse2.AndNot(vector0080, Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 7)));
                          }
  
+#if DEBUG
+                        // Quick check to ensure we didn't accidentally set both 0x8080 bits in any element.
+                        uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
+                        Debug.Assert((debugMask & (debugMask << 1)) == 0, "Two set bits shouldn't occur adjacent to each other in this mask.");
+#endif // DEBUG
+
                          // sets 0x8080 bits if corresponding char element is >= 0x0800
                          Vector128<ushort> charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
author	Levi Broderick <levib@microsoft.com>
	Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)
committer	Levi Broderick <levib@microsoft.com>
	Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)