Fix xxHash64 handling of large (> 4GB) inputs (#73093)
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>
Tue, 2 Aug 2022 01:17:56 +0000 (18:17 -0700)
committerGitHub <noreply@github.com>
Tue, 2 Aug 2022 01:17:56 +0000 (18:17 -0700)
src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.State.cs
src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs
src/libraries/System.IO.Hashing/tests/NonCryptoHashTestDriver.cs
src/libraries/System.IO.Hashing/tests/XxHash32Tests.007.cs
src/libraries/System.IO.Hashing/tests/XxHash32Tests.cs
src/libraries/System.IO.Hashing/tests/XxHash32Tests.f00d.cs
src/libraries/System.IO.Hashing/tests/XxHash64Tests.007.cs
src/libraries/System.IO.Hashing/tests/XxHash64Tests.cs
src/libraries/System.IO.Hashing/tests/XxHash64Tests.f00d.cs

index ecb289a..9c1493c 100644 (file)
@@ -90,7 +90,7 @@ namespace System.IO.Hashing
                 return acc;
             }
 
-            internal readonly ulong Complete(int length, ReadOnlySpan<byte> remaining)
+            internal readonly ulong Complete(long length, ReadOnlySpan<byte> remaining)
             {
                 ulong acc = _hadFullStripe ? Converge() : _smallAcc;
 
index ab20bdd..32cc5ae 100644 (file)
@@ -19,7 +19,7 @@ namespace System.IO.Hashing
         private readonly ulong _seed;
         private State _state;
         private byte[]? _holdback;
-        private int _length;
+        private long _length;
 
         /// <summary>
         ///   Initializes a new instance of the <see cref="XxHash64"/> class.
@@ -67,7 +67,7 @@ namespace System.IO.Hashing
             // Data that isn't perfectly mod-32 gets stored in a holdback
             // buffer.
 
-            int held = _length & 0x1F;
+            int held = (int)_length & 0x1F;
 
             if (held != 0)
             {
@@ -110,7 +110,7 @@ namespace System.IO.Hashing
         /// </summary>
         protected override void GetCurrentHashCore(Span<byte> destination)
         {
-            int remainingLength = _length & 0x1F;
+            int remainingLength = (int)_length & 0x1F;
             ReadOnlySpan<byte> remaining = ReadOnlySpan<byte>.Empty;
 
             if (remainingLength > 0)
@@ -225,7 +225,7 @@ namespace System.IO.Hashing
                 source = source.Slice(StripeSize);
             }
 
-            ulong val = state.Complete(totalLength, source);
+            ulong val = state.Complete((uint)totalLength, source);
             BinaryPrimitives.WriteUInt64BigEndian(destination, val);
             return HashSize;
         }
index 0d4d9be..cb684c0 100644 (file)
@@ -48,7 +48,7 @@ namespace System.IO.Hashing.Tests
                         targetMethodName,
                         BindingFlags.Instance | BindingFlags.Public);
 
-                    if (info2 is null)
+                    if (info2 is null && !info.IsDefined(typeof(OverrideOptionalAttribute)))
                     {
                         missingMethods ??= new List<string>();
                         missingMethods.Add(targetMethodName);
@@ -116,6 +116,20 @@ namespace System.IO.Hashing.Tests
             testCase.VerifyResponse(answer);
         }
 
+        [OverrideOptional]
+        protected void InstanceMultiAppendLargeInputDriver(LargeTestCase testCase)
+        {
+            NonCryptographicHashAlgorithm hash = CreateInstance();
+
+            foreach (ReadOnlyMemory<byte> chunk in testCase.EnumerateDataChunks())
+            {
+                hash.Append(chunk.Span);
+            }
+
+            byte[] answer = hash.GetHashAndReset();
+            testCase.VerifyResponse(answer);
+        }
+
         protected void InstanceVerifyEmptyStateDriver(TestCase testCase)
         {
             Span<byte> buf = stackalloc byte[256];
@@ -280,46 +294,23 @@ namespace System.IO.Hashing.Tests
             }
         }
 
-        public sealed class TestCase
+        public abstract class TestCaseBase
         {
-            private byte[] _input;
-            private byte[] _output;
-
+            private readonly byte[] _output;
             public string Name { get; }
-            public ReadOnlySpan<byte> Input => new ReadOnlySpan<byte>(_input);
+            public ReadOnlySpan<byte> OutputBytes => _output;
             public string OutputHex { get; }
 
-            public TestCase(string name, byte[] input, byte[] output)
-            {
-                Name = name;
-                _input = input;
-                OutputHex = ToHexString(output);
-                _output = FromHexString(OutputHex);
-            }
-
-            public TestCase(string name, byte[] input, string outputHex)
-            {
-                Name = name;
-                _input = input;
-                OutputHex = outputHex.ToUpperInvariant();
-                _output = FromHexString(OutputHex);
-            }
-
-            public TestCase(string name, string inputHex, string outputHex)
+            protected TestCaseBase(string name, byte[] output)
             {
-                Name = name;
-                _input = FromHexString(inputHex);
-                OutputHex = outputHex.ToUpperInvariant();
-                _output = FromHexString(OutputHex);
-            }
-
-            internal void VerifyResponse(ReadOnlySpan<byte> response)
-            {
-                if (!response.SequenceEqual(_output))
+                if (output is null || output.Length == 0)
                 {
-                    // We know this will fail, but it gives a nice presentation.
-                    Assert.Equal(OutputHex, ToHexString(response));
+                    throw new ArgumentException("Argument should not be null or empty.", nameof(output));
                 }
+
+                Name = name;
+                _output = output;
+                OutputHex = ToHexString(output);
             }
 
             internal static string ToHexString(ReadOnlySpan<byte> input)
@@ -356,6 +347,80 @@ namespace System.IO.Hashing.Tests
             }
 
             public override string ToString() => Name;
+
+            internal void VerifyResponse(ReadOnlySpan<byte> response)
+            {
+                if (!response.SequenceEqual(OutputBytes))
+                {
+                    // We know this will fail, but it gives a nice presentation.
+                    Assert.Equal(OutputHex, ToHexString(response));
+                }
+            }
+        }
+
+        public sealed class TestCase : TestCaseBase
+        {
+            private readonly byte[] _input;
+            public ReadOnlySpan<byte> Input => new ReadOnlySpan<byte>(_input);
+
+            public TestCase(string name, byte[] input, byte[] output)
+                : base(name, output)
+            {
+                _input = input;
+            }
+
+            public TestCase(string name, byte[] input, string outputHex)
+                : base(name, FromHexString(outputHex))
+            {
+                _input = input;
+            }
+
+            public TestCase(string name, string inputHex, string outputHex)
+                : base(name, FromHexString(outputHex))
+            {
+                _input = FromHexString(inputHex);
+            }
+        }
+
+        public sealed class LargeTestCase : TestCaseBase
+        {
+            private readonly byte _data;
+            private readonly long _repeatCount;
+
+            public LargeTestCase(string name, byte data, long repeatCount, string outputHex)
+                : base(name, FromHexString(outputHex))
+            {
+                if (repeatCount < 0)
+                {
+                    throw new ArgumentOutOfRangeException(nameof(repeatCount));
+                }
+
+                _data = data;
+                _repeatCount = repeatCount;
+            }
+
+            public IEnumerable<ReadOnlyMemory<byte>> EnumerateDataChunks()
+            {
+#if NET5_0_OR_GREATER
+                byte[] chunk = GC.AllocateUninitializedArray<byte>(1024 * 1024);
+#else
+                byte[] chunk = new byte[1024 * 1024];
+#endif
+                chunk.AsSpan().Fill(_data);
+
+                long remaining = _repeatCount;
+                while (remaining > 0)
+                {
+                    int thisChunkLength = (int)Math.Min(remaining, chunk.Length);
+                    yield return chunk.AsMemory(0, thisChunkLength);
+                    remaining -= thisChunkLength;
+                }
+            }
+        }
+
+        [AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)]
+        private sealed class OverrideOptionalAttribute : Attribute
+        {
         }
     }
 }
index 9d39043..28d00ce 100644 (file)
@@ -92,6 +92,31 @@ namespace System.IO.Hashing.Tests
                     "FC23CD03"),
             };
 
+        public static IEnumerable<object[]> LargeTestCases
+        {
+            get
+            {
+                object[] arr = new object[1];
+
+                foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
+                {
+                    arr[0] = testCase;
+                    yield return arr;
+                }
+            }
+        }
+
+        protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
+            new[]
+            {
+                // Manually run against the xxHash32 reference implementation.
+                new LargeTestCase(
+                    "EEEEE... (10GB)",
+                    (byte)'E',
+                    10L * 1024 * 1024 * 1024, // 10 GB
+                    "1C44F650"),
+            };
+
         protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(Seed);
 
         protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source, Seed);
@@ -126,6 +151,14 @@ namespace System.IO.Hashing.Tests
         }
 
         [Theory]
+        [MemberData(nameof(LargeTestCases))]
+        [OuterLoop]
+        public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
+        {
+            InstanceMultiAppendLargeInputDriver(testCase);
+        }
+
+        [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceVerifyEmptyState(TestCase testCase)
         {
index 9cbd565..803a10d 100644 (file)
@@ -106,6 +106,31 @@ namespace System.IO.Hashing.Tests
                     "5DF7D6C0"),
             };
 
+        public static IEnumerable<object[]> LargeTestCases
+        {
+            get
+            {
+                object[] arr = new object[1];
+
+                foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
+                {
+                    arr[0] = testCase;
+                    yield return arr;
+                }
+            }
+        }
+
+        protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
+            new[]
+            {
+                // Manually run against the xxHash32 reference implementation.
+                new LargeTestCase(
+                    "EEEEE... (10GB)",
+                    (byte)'E',
+                    10L * 1024 * 1024 * 1024, // 10 GB
+                    "22CBC3AA"),
+            };
+
         protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32();
 
         protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source);
@@ -140,6 +165,14 @@ namespace System.IO.Hashing.Tests
         }
 
         [Theory]
+        [MemberData(nameof(LargeTestCases))]
+        [OuterLoop]
+        public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
+        {
+            InstanceMultiAppendLargeInputDriver(testCase);
+        }
+
+        [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceVerifyEmptyState(TestCase testCase)
         {
index 8a01bc8..ac17869 100644 (file)
@@ -92,6 +92,31 @@ namespace System.IO.Hashing.Tests
                     "C7A3D1CB"),
             };
 
+        public static IEnumerable<object[]> LargeTestCases
+        {
+            get
+            {
+                object[] arr = new object[1];
+
+                foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
+                {
+                    arr[0] = testCase;
+                    yield return arr;
+                }
+            }
+        }
+
+        protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
+            new[]
+            {
+                // Manually run against the xxHash32 reference implementation.
+                new LargeTestCase(
+                    "EEEEE... (10GB)",
+                    (byte)'E',
+                    10L * 1024 * 1024 * 1024, // 10 GB
+                    "B19FAE15"),
+            };
+
         protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(Seed);
 
         protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source, Seed);
@@ -126,6 +151,14 @@ namespace System.IO.Hashing.Tests
         }
 
         [Theory]
+        [MemberData(nameof(LargeTestCases))]
+        [OuterLoop]
+        public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
+        {
+            InstanceMultiAppendLargeInputDriver(testCase);
+        }
+
+        [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceVerifyEmptyState(TestCase testCase)
         {
index fb3c514..94c53f8 100644 (file)
@@ -114,6 +114,31 @@ namespace System.IO.Hashing.Tests
         protected override bool TryStaticOneShot(ReadOnlySpan<byte> source, Span<byte> destination, out int bytesWritten) =>
             XxHash64.TryHash(source, destination, out bytesWritten, Seed);
 
+        public static IEnumerable<object[]> LargeTestCases
+        {
+            get
+            {
+                object[] arr = new object[1];
+
+                foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
+                {
+                    arr[0] = testCase;
+                    yield return arr;
+                }
+            }
+        }
+
+        protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
+            new[]
+            {
+                // Manually run against the xxHash64 reference implementation.
+                new LargeTestCase(
+                    "EEEEE... (10GB)",
+                    (byte)'E',
+                    10L * 1024 * 1024 * 1024, // 10 GB
+                    "DFBE10B17366232C"),
+            };
+
         [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceAppendAllocate(TestCase testCase)
@@ -136,6 +161,14 @@ namespace System.IO.Hashing.Tests
         }
 
         [Theory]
+        [MemberData(nameof(LargeTestCases))]
+        [OuterLoop]
+        public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
+        {
+            InstanceMultiAppendLargeInputDriver(testCase);
+        }
+
+        [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceVerifyEmptyState(TestCase testCase)
         {
index 197446d..7e8cb82 100644 (file)
@@ -119,6 +119,31 @@ namespace System.IO.Hashing.Tests
                     "BDD40F0FAC166EAA"),
             };
 
+        public static IEnumerable<object[]> LargeTestCases
+        {
+            get
+            {
+                object[] arr = new object[1];
+
+                foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
+                {
+                    arr[0] = testCase;
+                    yield return arr;
+                }
+            }
+        }
+
+        protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
+            new[]
+            {
+                // Manually run against the xxHash64 reference implementation.
+                new LargeTestCase(
+                    "EEEEE... (10GB)",
+                    (byte)'E',
+                    10L * 1024 * 1024 * 1024, // 10 GB
+                    "F3CB8D45A8B695EF"),
+            };
+
         protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash64();
 
         protected override byte[] StaticOneShot(byte[] source) => XxHash64.Hash(source);
@@ -153,6 +178,14 @@ namespace System.IO.Hashing.Tests
         }
 
         [Theory]
+        [MemberData(nameof(LargeTestCases))]
+        [OuterLoop]
+        public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
+        {
+            InstanceMultiAppendLargeInputDriver(testCase);
+        }
+
+        [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceVerifyEmptyState(TestCase testCase)
         {
index 7145759..2b3bfdf 100644 (file)
@@ -102,6 +102,31 @@ namespace System.IO.Hashing.Tests
                     "C9B96062B49FEC42"),
             };
 
+        public static IEnumerable<object[]> LargeTestCases
+        {
+            get
+            {
+                object[] arr = new object[1];
+
+                foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
+                {
+                    arr[0] = testCase;
+                    yield return arr;
+                }
+            }
+        }
+
+        protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
+            new[]
+            {
+                // Manually run against the xxHash64 reference implementation.
+                new LargeTestCase(
+                    "EEEEE... (10GB)",
+                    (byte)'E',
+                    10L * 1024 * 1024 * 1024, // 10 GB
+                    "CD7B3A954E199AE8"),
+            };
+
         protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash64(Seed);
 
         protected override byte[] StaticOneShot(byte[] source) => XxHash64.Hash(source, Seed);
@@ -136,6 +161,14 @@ namespace System.IO.Hashing.Tests
         }
 
         [Theory]
+        [MemberData(nameof(LargeTestCases))]
+        [OuterLoop]
+        public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
+        {
+            InstanceMultiAppendLargeInputDriver(testCase);
+        }
+
+        [Theory]
         [MemberData(nameof(TestCases))]
         public void InstanceVerifyEmptyState(TestCase testCase)
         {