From: Levi Broderick Date: Tue, 16 Apr 2019 01:28:39 +0000 (-0700) Subject: Fix incorrect bit match pattern in UTF-16 validation (dotnet/coreclr#24015) X-Git-Tag: submit/tizen/20210909.063632~11030^2~1832 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c2e5f9e18cf0a09068327d018bfb316e339ed5ef;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Fix incorrect bit match pattern in UTF-16 validation (dotnet/coreclr#24015) Commit migrated from https://github.com/dotnet/coreclr/commit/fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83 --- diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs index 40e818e..bdf7972 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs @@ -171,11 +171,28 @@ namespace System.Text.Unicode // - 00 if the corresponding UTF-16 char was a high surrogate code unit; // - 01 if the corresponding UTF-16 char was a low surrogate code unit; // - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit. + // Since 'mask' already has 00 in these positions (since the corresponding char + // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions. uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte()); - uint lowSurrogatesMask = mask2 & mask; // 01 only if was a low surrogate char, else 00 - uint highSurrogatesMask = (mask2 ^ mask) & 0x5555u; // 01 only if was a high surrogate char, else 00 + // 'lowSurrogatesMask' has its bits occur in pairs: + // - 01 if the corresponding char was a low surrogate char, + // - 00 if the corresponding char was a high surrogate char or not a surrogate at all. + + uint lowSurrogatesMask = mask2 & mask; + + // 'highSurrogatesMask' has its bits occur in pairs: + // - 01 if the corresponding char was a high surrogate char, + // - 00 if the corresponding char was a low surrogate char or not a surrogate at all. + + uint highSurrogatesMask = (mask2 ^ 0b_0101_0101_0101_0101u /* flip all even-numbered bits 00 <-> 01 */) & mask; + + Debug.Assert((highSurrogatesMask & lowSurrogatesMask) == 0, + "A char cannot simultaneously be both a high and a low surrogate char."); + + Debug.Assert(((highSurrogatesMask | lowSurrogatesMask) & 0b_1010_1010_1010_1010u) == 0, + "Only even bits (no odd bits) of the masks should be set."); // Now check that each high surrogate is followed by a low surrogate and that each // low surrogate follows a high surrogate. We make an exception for the case where