Perf: Consolidate implementation of LeadingZeroCount (dotnet/coreclr#22497)
authorGrant <grant@jesanna.com>
Wed, 13 Feb 2019 22:17:01 +0000 (14:17 -0800)
committerTanner Gooding <tagoo@outlook.com>
Wed, 13 Feb 2019 22:17:01 +0000 (14:17 -0800)
* Perf: BitOps.LeadingZeroCount

* CR fix

* CR fixes

* Optimization

* Revert

* Confirmed fix

* Simplify

* Return int

* Fixes

* Simplify

* CR fixes

* CR fixes

* Fix unit failure on Linux-musl x64 Debug

* CR fixes

* CR fixes

* Cleanup

Commit migrated from https://github.com/dotnet/coreclr/commit/eee8c24c0351605909f93481b54d5525c76980f4

src/libraries/System.Private.CoreLib/src/System/BitOps.cs
src/libraries/System.Private.CoreLib/src/System/Buffers/Text/FormattingHelpers.CountDigits.cs
src/libraries/System.Private.CoreLib/src/System/Buffers/Utilities.cs
src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
src/libraries/System.Private.CoreLib/src/System/Number.BigInteger.cs
src/libraries/System.Private.CoreLib/src/System/Number.DiyFp.cs
src/libraries/System.Private.CoreLib/src/System/Number.Dragon4.cs
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs

index 99cb199..cb54cf4 100644 (file)
@@ -26,6 +26,14 @@ namespace System
             26, 12, 18, 06, 11, 05, 10, 09
         };
 
+        private static ReadOnlySpan<byte> s_Log2DeBruijn => new byte[32]
+        {
+            00, 09, 01, 10, 13, 21, 02, 29,
+            11, 14, 16, 18, 22, 25, 03, 30,
+            08, 12, 20, 28, 15, 17, 24, 07,
+            19, 27, 23, 06, 26, 05, 04, 31
+        };
+
         /// <summary>
         /// Count the number of trailing zero bits in an integer value.
         /// Similar in behavior to the x86 instruction TZCNT.
@@ -49,15 +57,183 @@ namespace System
                 return (int)Bmi1.TrailingZeroCount(value);
             }
 
-            // Main code has behavior 0->0, so special-case in order to match intrinsic path 0->32
-            if (value == 0u)
+            // Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
+            if (value == 0)
+            {
                 return 32;
+            }
 
             // uint.MaxValue >> 27 is always in range [0 - 31] so we use Unsafe.AddByteOffset to avoid bounds check
             return Unsafe.AddByteOffset(
-                ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn), 
                 // Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u
-                ((uint)((value & -value) * 0x077CB531u)) >> 27);
+                ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn),
+                // long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
+                (IntPtr)(int)(((uint)((value & -value) * 0x077CB531u)) >> 27)); // shift over long also expensive on 32-bit
+        }
+
+        /// <summary>
+        /// Count the number of trailing zero bits in a mask.
+        /// Similar in behavior to the x86 instruction TZCNT.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int TrailingZeroCount(long value)
+            => TrailingZeroCount((ulong)value);
+
+        /// <summary>
+        /// Count the number of trailing zero bits in a mask.
+        /// Similar in behavior to the x86 instruction TZCNT.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int TrailingZeroCount(ulong value)
+        {
+            if (Bmi1.X64.IsSupported)
+            {
+                // Note that TZCNT contract specifies 0->64
+                return (int)Bmi1.X64.TrailingZeroCount(value);
+            }
+
+            uint lo = (uint)value;
+
+            if (lo == 0)
+            {
+                return 32 + TrailingZeroCount((uint)(value >> 32));
+            }
+
+            return TrailingZeroCount(lo);
+        }
+
+        /// <summary>
+        /// Count the number of leading zero bits in a mask.
+        /// Similar in behavior to the x86 instruction LZCNT.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int LeadingZeroCount(uint value)
+        {
+            if (Lzcnt.IsSupported)
+            {
+                // Note that LZCNT contract specifies 0->32
+                return (int)Lzcnt.LeadingZeroCount(value);
+            }
+
+            // Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
+            if (value == 0)
+            {
+                return 32;
+            }
+
+            return 31 - Log2(value);
+        }
+
+        /// <summary>
+        /// Count the number of leading zero bits in a mask.
+        /// Similar in behavior to the x86 instruction LZCNT.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int LeadingZeroCount(ulong value)
+        {
+            if (Lzcnt.X64.IsSupported)
+            {
+                // Note that LZCNT contract specifies 0->64
+                return (int)Lzcnt.X64.LeadingZeroCount(value);
+            }
+
+            uint hi = (uint)(value >> 32);
+
+            if (hi == 0)
+            {
+                return 32 + LeadingZeroCount((uint)value);
+            }
+
+            return LeadingZeroCount(hi);
+        }
+
+        /// <summary>
+        /// Returns the integer (floor) log of the specified value, base 2.
+        /// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Log2(uint value)
+        {
+            // value    lzcnt   actual  expected
+            // ..0000   32      0        0 (by convention, guard clause)
+            // ..0001   31      31-31    0
+            // ..0010   30      31-30    1
+            // 0010..    2      31-2    29
+            // 0100..    1      31-1    30
+            // 1000..    0      31-0    31
+            if (Lzcnt.IsSupported)
+            {
+                // Enforce conventional contract 0->0 (since Log(0) is undefined)
+                if (value == 0)
+                {
+                    return 0;
+                }
+
+                // Note that LZCNT contract specifies 0->32
+                return 31 - (int)Lzcnt.LeadingZeroCount(value);
+            }
+
+            // Already has contract 0->0, without branching
+            return Log2SoftwareFallback(value);
+        }
+
+        /// <summary>
+        /// Returns the integer (floor) log of the specified value, base 2.
+        /// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
+        /// Does not incur branching.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        private static int Log2SoftwareFallback(uint value)
+        {
+            // No AggressiveInlining due to large method size
+
+            value |= value >> 01;
+            value |= value >> 02;
+            value |= value >> 04;
+            value |= value >> 08;
+            value |= value >> 16;
+
+            // uint.MaxValue >> 27 is always in range [0 - 31] so we use Unsafe.AddByteOffset to avoid bounds check
+            return Unsafe.AddByteOffset(
+                // Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_1100_0100_1010_1100_1101_1101u
+                ref MemoryMarshal.GetReference(s_Log2DeBruijn),
+                // long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
+                (IntPtr)(int)((value * 0x07C4ACDDu) >> 27));
+        }
+
+        /// <summary>
+        /// Returns the integer (floor) log of the specified value, base 2.
+        /// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int Log2(ulong value)
+        {
+            if (Lzcnt.X64.IsSupported)
+            {
+                // Enforce conventional contract 0->0 (since Log(0) is undefined)
+                if (value == 0)
+                {
+                    return 0;
+                }
+
+                // Note that LZCNT contract specifies 0->64
+                return 63 - (int)Lzcnt.X64.LeadingZeroCount(value);
+            }
+
+            uint hi = (uint)(value >> 32);
+
+            if (hi == 0)
+            {
+                return Log2((uint)value);
+            }
+
+            return 32 + Log2(hi);
         }
     }
 }
index 35545c4..387e08f 100644 (file)
@@ -4,7 +4,6 @@
 
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
-using System.Runtime.Intrinsics.X86;
 
 namespace System.Buffers.Text
 {
@@ -104,36 +103,11 @@ namespace System.Buffers.Text
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static int CountHexDigits(ulong value)
         {
-            if (Lzcnt.X64.IsSupported)
-            {
-                int right = 64 - (int)Lzcnt.X64.LeadingZeroCount(value | 1);
-                return (right + 3) >> 2;
-            }
-
-            int digits = 1;
-
-            if (value > 0xFFFFFFFF)
-            {
-                digits += 8;
-                value >>= 0x20;
-            }
-            if (value > 0xFFFF)
-            {
-                digits += 4;
-                value >>= 0x10;
-            }
-            if (value > 0xFF)
-            {
-                digits += 2;
-                value >>= 0x8;
-            }
-            if (value > 0xF)
-                digits++;
-
-            return digits;
+            int right = 64 - BitOps.LeadingZeroCount(value | 1);
+            return (right + 3) >> 2;
         }
 
-        
+
         // Counts the number of trailing '0' digits in a decimal number.
         // e.g., value =      0 => retVal = 0, valueWithoutTrailingZeros = 0
         //       value =   1234 => retVal = 0, valueWithoutTrailingZeros = 1234
index 628449c..c9cc8f5 100644 (file)
@@ -4,7 +4,6 @@
 
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
-using System.Runtime.Intrinsics.X86;
 
 namespace System.Buffers
 {
@@ -14,25 +13,8 @@ namespace System.Buffers
         internal static int SelectBucketIndex(int bufferSize)
         {
             Debug.Assert(bufferSize >= 0);
-            if (Lzcnt.IsSupported)
-            {
-                uint bits = ((uint)bufferSize - 1) >> 4;
-                return 32 - (int)Lzcnt.LeadingZeroCount(bits);
-            }
-
-            // bufferSize of 0 will underflow here, causing a huge
-            // index which the caller will discard because it is not
-            // within the bounds of the bucket array.
-            uint bitsRemaining = ((uint)bufferSize - 1) >> 4;
-
-            int poolIndex = 0;
-            if (bitsRemaining > 0xFFFF) { bitsRemaining >>= 16; poolIndex = 16; }
-            if (bitsRemaining > 0xFF)   { bitsRemaining >>= 8;  poolIndex += 8; }
-            if (bitsRemaining > 0xF)    { bitsRemaining >>= 4;  poolIndex += 4; }
-            if (bitsRemaining > 0x3)    { bitsRemaining >>= 2;  poolIndex += 2; }
-            if (bitsRemaining > 0x1)    { bitsRemaining >>= 1;  poolIndex += 1; }
-
-            return poolIndex + (int)bitsRemaining;
+            uint bits = ((uint)bufferSize - 1) >> 4;
+            return 32 - BitOps.LeadingZeroCount(bits);
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
index 51f588e..a2bd04b 100644 (file)
@@ -547,7 +547,7 @@ PosRem:
                 if (hiRes > 2)
                 {
                     newScale = (int)hiRes * 32 - 64 - 1;
-                    newScale -= X86.Lzcnt.IsSupported ? (int)X86.Lzcnt.LeadingZeroCount(result[hiRes]) : LeadingZeroCount(result[hiRes]);
+                    newScale -= BitOps.LeadingZeroCount(result[hiRes]);
 
                     // Multiply bit position by log10(2) to figure it's power of 10.
                     // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
@@ -724,34 +724,6 @@ ThrowOverflow:
                 return power;
             }
 
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            private static int LeadingZeroCount(uint value)
-            {
-                Debug.Assert(value > 0);
-                int c = 1;
-                if ((value & 0xFFFF0000) == 0)
-                {
-                    value <<= 16;
-                    c += 16;
-                }
-                if ((value & 0xFF000000) == 0)
-                {
-                    value <<= 8;
-                    c += 8;
-                }
-                if ((value & 0xF0000000) == 0)
-                {
-                    value <<= 4;
-                    c += 4;
-                }
-                if ((value & 0xC0000000) == 0)
-                {
-                    value <<= 2;
-                    c += 2;
-                }
-                return c + ((int)value >> 31);
-            }
-
             /// <summary>
             /// Adjust the quotient to deal with an overflow.
             /// We need to divide by 10, feed in the high bit to undo the overflow and then round as required.
@@ -2047,7 +2019,7 @@ ReturnZero:
                     if (tmp == 0)
                         tmp = d2.Mid;
 
-                    curScale = X86.Lzcnt.IsSupported ? (int)X86.Lzcnt.LeadingZeroCount(tmp) : LeadingZeroCount(tmp);
+                    curScale = BitOps.LeadingZeroCount(tmp);
 
                     // Shift both dividend and divisor left by curScale.
                     //
@@ -2328,7 +2300,7 @@ ThrowOverflow:
                 uint tmp = d2.High;
                 if (tmp == 0)
                     tmp = d2.Mid;
-                int shift = X86.Lzcnt.IsSupported ? (int)X86.Lzcnt.LeadingZeroCount(tmp) : LeadingZeroCount(tmp);
+                int shift = BitOps.LeadingZeroCount(tmp);
 
                 Buf28 b;
                 _ = &b; // workaround for CS0165
index 0521e2c..607b450 100644 (file)
@@ -311,12 +311,6 @@ namespace System
                 0x00000000,
             };
 
-            private static readonly uint[] s_MultiplyDeBruijnBitPosition = new uint[]
-            {
-                0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
-                8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
-            };
-
             private int _length;
             private fixed uint _blocks[MaxBlockCount];
 
@@ -461,7 +455,7 @@ namespace System
 
             public static uint CountSignificantBits(uint value)
             {
-                return (value != 0) ? (1 + LogBase2(value)) : 0;
+                return 32 - (uint)BitOps.LeadingZeroCount(value);
             }
 
             public static uint CountSignificantBits(ulong value)
@@ -473,7 +467,7 @@ namespace System
                     return 32 + CountSignificantBits(upper);
                 }
 
-                return CountSignificantBits((uint)(value));
+                return CountSignificantBits((uint)value);
             }
 
             public static uint CountSignificantBits(ref BigInteger value)
@@ -566,7 +560,7 @@ namespace System
                     uint divLo = rhs._blocks[rhsLength - 2];
 
                     // We measure the leading zeros of the divisor
-                    int shiftLeft = (int)(LeadingZeroCount(divHi));
+                    int shiftLeft = BitOps.LeadingZeroCount(divHi);
                     int shiftRight = 32 - shiftLeft;
 
                     // And, we make sure the most significant bit is set
@@ -750,47 +744,6 @@ namespace System
                 return quotient;
             }
 
-            public static uint LeadingZeroCount(uint value)
-            {
-                return 32 - CountSignificantBits(value);
-            }
-
-            public static uint LeadingZeroCount(ulong value)
-            {
-                return 64 - CountSignificantBits(value);
-            }
-
-            public static uint LogBase2(uint value)
-            {
-                Debug.Assert(value != 0);
-
-                // This comes from the Stanford Bit Widdling Hacks by Sean Eron Anderson:
-                // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
-
-                value |= (value >> 1); // first round down to one less than a power of 2 
-                value |= (value >> 2);
-                value |= (value >> 4);
-                value |= (value >> 8);
-                value |= (value >> 16);
-
-                uint index = (value * 0x07C4ACDD) >> 27;
-                return s_MultiplyDeBruijnBitPosition[(int)(index)];
-            }
-
-            public static uint LogBase2(ulong value)
-            {
-                Debug.Assert(value != 0);
-
-                uint upper = (uint)(value >> 32);
-
-                if (upper != 0)
-                {
-                    return 32 + LogBase2(upper);
-                }
-
-                return LogBase2((uint)(value));
-            }
-
             public static void Multiply(ref BigInteger lhs, uint value, ref BigInteger result)
             {
                 if (lhs.IsZero() || (value == 1))
index 5e20b8e..b8f2fdc 100644 (file)
@@ -111,7 +111,7 @@ namespace System
                 // and subtract once.
 
                 Debug.Assert(f != 0);
-                int lzcnt = (int)(BigInteger.LeadingZeroCount(f));
+                int lzcnt = BitOps.LeadingZeroCount(f);
                 return new DiyFp((f << lzcnt), (e - lzcnt));
             }
 
index 5fa812c..7682506 100644 (file)
@@ -30,7 +30,8 @@ namespace System
             }
             else
             {
-                mantissaHighBitIdx = BigInteger.LogBase2(mantissa);
+                Debug.Assert(mantissa != 0);
+                mantissaHighBitIdx = (uint)BitOps.Log2(mantissa);
             }
 
             int length = (int)(Dragon4(mantissa, exponent, mantissaHighBitIdx, hasUnequalMargins, cutoffNumber, isSignificantDigits, number.Digits, out int decimalExponent));
@@ -59,7 +60,8 @@ namespace System
             }
             else
             {
-                mantissaHighBitIdx = BigInteger.LogBase2(mantissa);
+                Debug.Assert(mantissa != 0);
+                mantissaHighBitIdx = (uint)BitOps.Log2(mantissa);
             }
 
             int length = (int)(Dragon4(mantissa, exponent, mantissaHighBitIdx, hasUnequalMargins, cutoffNumber, isSignificantDigits, number.Digits, out int decimalExponent));
@@ -289,7 +291,8 @@ namespace System
                 // We are more likely to make accurate quotient estimations in BigInteger.HeuristicDivide() with higher denominator values so we shift the denominator to place the highest bit at index 27 of the highest block.
                 // This is safe because (2^28 - 1) = 268435455 which is less than 429496729.
                 // This means that all values with a highest bit at index 27 are within range.
-                uint hiBlockLog2 = BigInteger.LogBase2(hiBlock);
+                Debug.Assert(hiBlock != 0);
+                uint hiBlockLog2 = (uint)BitOps.Log2(hiBlock);
                 Debug.Assert((hiBlockLog2 < 3) || (hiBlockLog2 > 27));
                 uint shift = (32 + 27 - hiBlockLog2) % 32;
 
index 5afea7b..bac4ef4 100644 (file)
@@ -1589,7 +1589,6 @@ namespace System
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static int LocateFirstFoundByte(ulong match)
         {
-            // TODO: Arm variants
             if (Bmi1.X64.IsSupported)
             {
                 return (int)(Bmi1.X64.TrailingZeroCount(match) >> 3);
@@ -1606,22 +1605,7 @@ namespace System
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static int LocateLastFoundByte(ulong match)
         {
-            // TODO: Arm variants
-            if (Lzcnt.X64.IsSupported)
-            {
-                return 7 - (int)(Lzcnt.X64.LeadingZeroCount(match) >> 3);
-            }
-            else
-            {
-                // Find the most significant byte that has its highest bit set
-                int index = 7;
-                while ((long)match > 0)
-                {
-                    match = match << 8;
-                    index--;
-                }
-                return index;
-            }
+            return 7 - (BitOps.LeadingZeroCount(match) >> 3);
         }
 
         private const ulong XorPowerOfTwoToHighByte = (0x07ul |
index b591152..6a2af73 100644 (file)
@@ -874,22 +874,7 @@ namespace System
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static int LocateLastFoundChar(ulong match)
         {
-            // TODO: Arm variants
-            if (Lzcnt.X64.IsSupported)
-            {
-                return 3 - (int)(Lzcnt.X64.LeadingZeroCount(match) >> 4);
-            }
-            else
-            {
-                // Find the most significant char that has its highest bit set
-                int index = 3;
-                while ((long)match > 0)
-                {
-                    match = match << 16;
-                    index--;
-                }
-                return index;
-            }
+            return 3 - (BitOps.LeadingZeroCount(match) >> 4);
         }
     }
 }