PR feedback: Clarify charIsNonAscii vector usage
authorLevi Broderick <levib@microsoft.com>
Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)
committerLevi Broderick <levib@microsoft.com>
Thu, 11 Apr 2019 18:37:00 +0000 (11:37 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/c3aa431c8e1f23dcf17c2369657ab0d3bcc195da

src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs

index 5f044b1..46c068d 100644 (file)
@@ -87,6 +87,11 @@ namespace System.Text.Unicode
                         Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
                         uint mask;
 
+                        // The 'charIsNonAscii' vector we're about to build will have the 0x8000 or the 0x0080
+                        // bit set (but not both!) only if the corresponding input char is non-ASCII. Which of
+                        // the two bits is set doesn't matter, as will be explained in the diagram a few lines
+                        // below.
+
                         Vector128<ushort> charIsNonAscii;
                         if (Sse41.IsSupported)
                         {
@@ -99,6 +104,12 @@ namespace System.Text.Unicode
                             charIsNonAscii = Sse2.AndNot(vector0080, Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 7)));
                         }
 
+#if DEBUG
+                        // Quick check to ensure we didn't accidentally set both 0x8080 bits in any element.
+                        uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
+                        Debug.Assert((debugMask & (debugMask << 1)) == 0, "Two set bits shouldn't occur adjacent to each other in this mask.");
+#endif // DEBUG
+
                         // sets 0x8080 bits if corresponding char element is >= 0x0800
                         Vector128<ushort> charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));