Add comments tracking JIT workarounds.

author Levi Broderick <levib@microsoft.com>

Wed, 10 Apr 2019 23:47:11 +0000 (16:47 -0700)

committer Levi Broderick <levib@microsoft.com>

Wed, 10 Apr 2019 23:47:11 +0000 (16:47 -0700)
author Levi Broderick <levib@microsoft.com>
Wed, 10 Apr 2019 23:47:11 +0000 (16:47 -0700)
committer Levi Broderick <levib@microsoft.com>
Wed, 10 Apr 2019 23:47:11 +0000 (16:47 -0700)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs

index c5ed461..33aa664 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs
@@ -30,6 +30,8 @@ namespace System.Text.Unicode
              // Since we already validated it's 80 <= ?? <= DF (per mask check earlier), now only need
              // to check that it's < C2.
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && ((byte)value < 0xC2u))
                  || (!BitConverter.IsLittleEndian && (value < 0xC200_0000u));
          }
@@ -45,8 +47,7 @@ namespace System.Text.Unicode
          private static bool DWordBeginsWithUtf8FourByteMask(uint value)
          {
              // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
+            // below but is slightly more optimized.
              //
              // if (BitConverter.IsLittleEndian)
              // {
@@ -61,6 +62,8 @@ namespace System.Text.Unicode
              //     return ((value & mask) == comparand);
              // }
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (((value - 0x8080_80F0u) & 0xC0C0_C0F8u) == 0))
                  || (!BitConverter.IsLittleEndian && (((value - 0xF080_8000u) & 0xF8C0_C0C0u) == 0));
          }
@@ -76,8 +79,7 @@ namespace System.Text.Unicode
          private static bool DWordBeginsWithUtf8ThreeByteMask(uint value)
          {
              // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
+            // below but is slightly more optimized.
              //
              // if (BitConverter.IsLittleEndian)
              // {
@@ -92,6 +94,8 @@ namespace System.Text.Unicode
              //     return ((value & mask) == comparand);
              // }
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (((value - 0x0080_80E0u) & 0x00C0_C0F0u) == 0))
                  || (!BitConverter.IsLittleEndian && (((value - 0xE080_8000u) & 0xF0C0_C000u) == 0));
          }
@@ -107,8 +111,7 @@ namespace System.Text.Unicode
          private static bool DWordBeginsWithUtf8TwoByteMask(uint value)
          {
              // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
+            // below but is slightly more optimized.
              //
              // if (BitConverter.IsLittleEndian)
              // {
@@ -123,6 +126,8 @@ namespace System.Text.Unicode
              //     return ((value & mask) == comparand);
              // }
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (((value - 0x0000_80C0u) & 0x0000_C0E0u) == 0))
                  || (!BitConverter.IsLittleEndian && (((value - 0xC080_0000u) & 0xE0C0_0000u) == 0));
          }
@@ -147,6 +152,8 @@ namespace System.Text.Unicode
              // This means that we can AND the leading byte with the mask 0001 1110 (1E),
              // and if the result is zero the sequence is overlong.
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && ((value & 0x001E_0000u) == 0))
                  || (!BitConverter.IsLittleEndian && ((value & 0x1E00u) == 0));
          }
@@ -162,8 +169,7 @@ namespace System.Text.Unicode
          private static bool DWordEndsWithUtf8TwoByteMask(uint value)
          {
              // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
+            // below but is slightly more optimized.
              //
              // if (BitConverter.IsLittleEndian)
              // {
@@ -178,6 +184,8 @@ namespace System.Text.Unicode
              //     return ((value & mask) == comparand);
              // }
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (((value - 0x80C0_0000u) & 0xC0E0_0000u) == 0))
                  || (!BitConverter.IsLittleEndian && (((value - 0x0000_C080u) & 0x0000_E0C0u) == 0));
          }
@@ -201,8 +209,10 @@ namespace System.Text.Unicode
  
              Debug.Assert(BitConverter.IsLittleEndian);
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && UnicodeUtility.IsInRangeInclusive(value & 0xC0FFu, 0x80C2u, 0x80DFu))
-                || (!BitConverter.IsLittleEndian && false); // this line - while weird - helps the JIT produce optimal code
+                || (!BitConverter.IsLittleEndian && false);
          }
  
          /// <summary>
@@ -218,8 +228,10 @@ namespace System.Text.Unicode
  
              Debug.Assert(BitConverter.IsLittleEndian);
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && UnicodeUtility.IsInRangeInclusive(value & 0xC0FF_0000u, 0x80C2_0000u, 0x80DF_0000u))
-                || (!BitConverter.IsLittleEndian && false); // this line - while weird - helps the JIT produce optimal code
+                || (!BitConverter.IsLittleEndian && false);
          }
  
          /// <summary>
@@ -229,18 +241,7 @@ namespace System.Text.Unicode
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
          private static bool DWordFirstByteIsAscii(uint value)
          {
-            // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
-            //
-            // if (BitConverter.IsLittleEndian)
-            // {
-            //     return ((value & 0x80U) == 0U);
-            // }
-            // else
-            // {
-            //     return ((int)value >= 0);
-            // }
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
  
              return (BitConverter.IsLittleEndian && ((value & 0x80u) == 0))
                  || (!BitConverter.IsLittleEndian && ((int)value >= 0));
@@ -253,18 +254,7 @@ namespace System.Text.Unicode
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
          private static bool DWordFourthByteIsAscii(uint value)
          {
-            // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
-            //
-            // if (BitConverter.IsLittleEndian)
-            // {
-            //     return ((int)value >= 0);
-            // }
-            // else
-            // {
-            //     return ((value & 0x80U) == 0U);
-            // }
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
  
              return (BitConverter.IsLittleEndian && ((int)value >= 0))
                  || (!BitConverter.IsLittleEndian && ((value & 0x80u) == 0));
@@ -277,18 +267,7 @@ namespace System.Text.Unicode
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
          private static bool DWordSecondByteIsAscii(uint value)
          {
-            // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
-            //
-            // if (BitConverter.IsLittleEndian)
-            // {
-            //     return ((value & 0x8000U) == 0U);
-            // }
-            // else
-            // {
-            //     return ((value & 0x800000U) == 0U);
-            // }
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
  
              return (BitConverter.IsLittleEndian && ((value & 0x8000u) == 0))
                  || (!BitConverter.IsLittleEndian && ((value & 0x0080_0000u) == 0));
@@ -301,18 +280,7 @@ namespace System.Text.Unicode
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
          private static bool DWordThirdByteIsAscii(uint value)
          {
-            // The code in this method is equivalent to the code
-            // below, but the JIT is able to inline + optimize it
-            // better in release builds.
-            //
-            // if (BitConverter.IsLittleEndian)
-            // {
-            //     return ((value & 0x800000U) == 0U);
-            // }
-            // else
-            // {
-            //     return ((value & 0x8000U) == 0U);
-            // }
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
  
              return (BitConverter.IsLittleEndian && ((value & 0x0080_0000u) == 0))
                  || (!BitConverter.IsLittleEndian && ((value & 0x8000u) == 0));
@@ -587,6 +555,8 @@ namespace System.Text.Unicode
              // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ 0000..007F ].
              // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ 0000..007F ].
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (value & 0xFF80u) == 0)
                  || (!BitConverter.IsLittleEndian && value < 0x0080_0000u);
          }
@@ -602,6 +572,8 @@ namespace System.Text.Unicode
              // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ 0800..FFFF ].
              // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ 0800..FFFF ].
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (value & 0xF800u) != 0)
                  || (!BitConverter.IsLittleEndian && value >= 0x0800_0000u);
          }
@@ -616,6 +588,8 @@ namespace System.Text.Unicode
              // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ D800..DFFF ].
              // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ D800..DFFF ].
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && ((value - 0xD800u) & 0xF800u) == 0)
                  || (!BitConverter.IsLittleEndian && (value - 0xD800_0000u) < 0x0800_0000u);
          }
@@ -632,6 +606,9 @@ namespace System.Text.Unicode
  
              // TODO: I'd like to be able to write "(ushort)(value - 0x0080u) < 0x0780u" for the little-endian
              // case, but the JIT doesn't currently emit 16-bit comparisons efficiently.
+            // Tracked as https://github.com/dotnet/coreclr/issues/18022.
+
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
  
              return (BitConverter.IsLittleEndian && ((value - 0x0080u) & 0xFFFFu) < 0x0780u)
                  || (!BitConverter.IsLittleEndian && UnicodeUtility.IsInRangeInclusive(value, 0x0080_0000u, 0x07FF_FFFFu));
@@ -646,6 +623,7 @@ namespace System.Text.Unicode
          {
              // The JIT won't emit a single 8-bit signed cmp instruction (see IsUtf8ContinuationByte),
              // so the best we can do for now is the lea / cmp pair.
+            // Tracked as https://github.com/dotnet/coreclr/issues/18022.
  
              return (byte)(value - 0x80u) <= 0x3Fu;
          }
@@ -660,6 +638,8 @@ namespace System.Text.Unicode
              // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ 0000..007F ].
              // Big-endian: Given [ #### BBBB ], return whether BBBB is in range [ 0000..007F ].
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && value < 0x0080_0000u)
                  || (!BitConverter.IsLittleEndian && (value & 0xFF80u) == 0);
          }
@@ -675,6 +655,8 @@ namespace System.Text.Unicode
              // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ 0800..FFFF ].
              // Big-endian: Given [ #### BBBB ], return whether ABBBBAAA is in range [ 0800..FFFF ].
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (value & 0xF800_0000u) != 0)
                  || (!BitConverter.IsLittleEndian && (value & 0xF800u) != 0);
          }
@@ -689,6 +671,8 @@ namespace System.Text.Unicode
              // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ D800..DFFF ].
              // Big-endian: Given [ #### BBBB ], return whether BBBB is in range [ D800..DFFF ].
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && (value - 0xD800_0000u) < 0x0800_0000u)
                  || (!BitConverter.IsLittleEndian && ((value - 0xD800u) & 0xF800u) == 0);
          }
@@ -705,6 +689,9 @@ namespace System.Text.Unicode
  
              // TODO: I'd like to be able to write "(ushort)(value - 0x0080u) < 0x0780u" for the big-endian
              // case, but the JIT doesn't currently emit 16-bit comparisons efficiently.
+            // Tracked as https://github.com/dotnet/coreclr/issues/18022.
+
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
  
              return (BitConverter.IsLittleEndian && UnicodeUtility.IsInRangeInclusive(value, 0x0080_0000u, 0x07FF_FFFFu))
                  || (!BitConverter.IsLittleEndian && ((value - 0x0080u) & 0xFFFFu) < 0x0780u);
@@ -745,6 +732,8 @@ namespace System.Text.Unicode
              // is a whole power of 2, which means that a CMP is really just a glorified TEST operation. Two TESTs *can*
              // be performed in parallel. The logic below then becomes 3 operations: "add/lea; test; jcc".
  
+            // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914.
+
              return (BitConverter.IsLittleEndian && ((value - 0xDC00_D800u) & 0xFC00_FC00u) == 0)
                  || (!BitConverter.IsLittleEndian && ((value - 0xD800_DC00u) & 0xFC00_FC00u) == 0);
          }
author	Levi Broderick <levib@microsoft.com>
	Wed, 10 Apr 2019 23:47:11 +0000 (16:47 -0700)
committer	Levi Broderick <levib@microsoft.com>
	Wed, 10 Apr 2019 23:47:11 +0000 (16:47 -0700)