From bffd8cc4d990e8559605a34d08d99a257acb074e Mon Sep 17 00:00:00 2001 From: Fei Peng Date: Mon, 5 Feb 2018 23:15:12 -0800 Subject: [PATCH] Implement SSE2 Store* intrinsics --- src/jit/hwintrinsiclistxarch.h | 6 + src/jit/instrsxarch.h | 2 + tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store.cs | 221 +++++++++++++++++ .../HardwareIntrinsics/X86/Sse2/StoreAligned.cs | 264 ++++++++++++++++++++ .../X86/Sse2/StoreAlignedNonTemporal.cs | 265 +++++++++++++++++++++ .../X86/Sse2/StoreAlignedNonTemporal_r.csproj | 34 +++ .../X86/Sse2/StoreAlignedNonTemporal_ro.csproj | 34 +++ .../X86/Sse2/StoreAligned_r.csproj | 34 +++ .../X86/Sse2/StoreAligned_ro.csproj | 34 +++ .../JIT/HardwareIntrinsics/X86/Sse2/StoreHigh.cs | 77 ++++++ .../HardwareIntrinsics/X86/Sse2/StoreHigh_r.csproj | 34 +++ .../X86/Sse2/StoreHigh_ro.csproj | 34 +++ .../JIT/HardwareIntrinsics/X86/Sse2/StoreLow.cs | 112 +++++++++ .../HardwareIntrinsics/X86/Sse2/StoreLow_r.csproj | 34 +++ .../HardwareIntrinsics/X86/Sse2/StoreLow_ro.csproj | 34 +++ .../JIT/HardwareIntrinsics/X86/Sse2/StoreScalar.cs | 78 ++++++ .../X86/Sse2/StoreScalar_r.csproj | 34 +++ .../X86/Sse2/StoreScalar_ro.csproj | 34 +++ .../JIT/HardwareIntrinsics/X86/Sse2/Store_r.csproj | 34 +++ .../HardwareIntrinsics/X86/Sse2/Store_ro.csproj | 34 +++ 20 files changed, 1433 insertions(+) create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_ro.csproj diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index d32aaf7..ab1f2bb5 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -157,6 +157,12 @@ HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate, "PackUnsign HARDWARE_INTRINSIC(SSE2_SetZeroVector128, "SetZeroVector128", SSE2, -1, 16, 0, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_Helper, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences, "SumAbsoluteDifferences", SSE2, -1, 16, 2, {INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg) HARDWARE_INTRINSIC(SSE2_Sqrt, "Sqrt", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_Store, "Store", SSE2, -1, 16, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAligned", SSE2, -1, 16, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_UnpackHigh, "UnpackHigh", SSE2, -1, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq,INS_punpckhqdq,INS_invalid, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index ca61a66..7cb3842 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -194,6 +194,8 @@ INST3( xorps, "xorps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCK INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2C)) // cvt with trunc scalar double to signed DWORDs #ifndef LEGACY_BACKEND +INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE) +INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE) INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE) INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F)) INST3( movdqa, "movdqa" , 0, IUM_WR, 0, 0, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F)) diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store.cs new file mode 100644 index 0000000..ce11559 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store.cs @@ -0,0 +1,221 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + using (TestTable doubleTable = new TestTable(new double[2] { 1, -5 }, new double[2])) + { + var vf = Unsafe.Read>(doubleTable.inArrayPtr); + Sse2.Store((double*)(doubleTable.outArrayPtr), vf); + + if (!doubleTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y))) + { + Console.WriteLine("Sse2 Store failed on double:"); + foreach (var item in doubleTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new long[2] { 1, -5 }, new long[2])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((long*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on long:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ulong[2] { 1, 5 }, new ulong[2])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((ulong*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on ulong:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new int[4] { 1, -5, 100, 0 }, new int[4])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((int*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on int:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new uint[4] { 1, 5, 100, 0 }, new uint[4])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((uint*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on uint:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new short[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new short[8])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((short*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on short:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ushort[8] { 1, 5, 100, 0, 1, 2, 3, 4 }, new ushort[8])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((ushort*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on ushort:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new sbyte[16] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new sbyte[16])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((sbyte*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on sbyte:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new byte[16] { 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4 }, new byte[16])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.Store((byte*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Sse2 Store failed on byte:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + } + + return testResult; + } + + public unsafe struct TestTable : IDisposable where T : struct + { + public T[] inArray; + public T[] outArray; + + public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle; + GCHandle outHandle; + public TestTable(T[] a, T[] b) + { + this.inArray = a; + this.outArray = b; + + inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + for (int i = 0; i < inArray.Length; i++) + { + if (!check(inArray[i], outArray[i])) + { + return false; + } + } + return true; + } + + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + } + + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned.cs new file mode 100644 index 0000000..c35d4fc --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned.cs @@ -0,0 +1,264 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + { + double* inArray = stackalloc double[2]; + byte* outBuffer = stackalloc byte[32]; + double* outArray = (double*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 2; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("Sse2 StoreAligned failed on double:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + long* inArray = stackalloc long[2]; + byte* outBuffer = stackalloc byte[32]; + long* outArray = (long*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 2; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on long:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[2]; + byte* outBuffer = stackalloc byte[32]; + ulong* outArray = (ulong*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 2; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on ulong:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + int* inArray = stackalloc int[4]; + byte* outBuffer = stackalloc byte[32]; + int* outArray = (int*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on int:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[4]; + byte* outBuffer = stackalloc byte[32]; + uint* outArray = (uint*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on uint:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + short* inArray = stackalloc short[8]; + byte* outBuffer = stackalloc byte[32]; + short* outArray = (short*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on short:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ushort* inArray = stackalloc ushort[8]; + byte* outBuffer = stackalloc byte[32]; + ushort* outArray = (ushort*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on ushort:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inArray = stackalloc byte[16]; + byte* outBuffer = stackalloc byte[32]; + byte* outArray = (byte*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on byte:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + sbyte* inArray = stackalloc sbyte[16]; + byte* outBuffer = stackalloc byte[32]; + sbyte* outArray = (sbyte*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAligned failed on byte:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + return testResult; + } + + static unsafe void* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (void*)(buffer + misalignment); + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal.cs new file mode 100644 index 0000000..3b1ff4c --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal.cs @@ -0,0 +1,265 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + { + double* inArray = stackalloc double[2]; + byte* outBuffer = stackalloc byte[32]; + double* outArray = (double*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 2; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on double:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + long* inArray = stackalloc long[2]; + byte* outBuffer = stackalloc byte[32]; + long* outArray = (long*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 2; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on long:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[2]; + byte* outBuffer = stackalloc byte[32]; + ulong* outArray = (ulong*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 2; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on ulong:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + int* inArray = stackalloc int[4]; + byte* outBuffer = stackalloc byte[32]; + int* outArray = (int*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on int:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[4]; + byte* outBuffer = stackalloc byte[32]; + uint* outArray = (uint*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on uint:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + short* inArray = stackalloc short[8]; + byte* outBuffer = stackalloc byte[32]; + short* outArray = (short*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on short:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ushort* inArray = stackalloc ushort[8]; + byte* outBuffer = stackalloc byte[32]; + ushort* outArray = (ushort*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on ushort:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inArray = stackalloc byte[16]; + byte* outBuffer = stackalloc byte[32]; + byte* outArray = (byte*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on byte:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + sbyte* inArray = stackalloc sbyte[16]; + byte* outBuffer = stackalloc byte[32]; + sbyte* outArray = (sbyte*)Align(outBuffer, 16); + + var vf = Unsafe.Read>(inArray); + Sse2.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Sse2 StoreAlignedNonTemporal failed on byte:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + return testResult; + } + + static unsafe void* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (void*)(buffer + misalignment); + } + } +} + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_r.csproj new file mode 100644 index 0000000..81ca111 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_ro.csproj new file mode 100644 index 0000000..1170f4b --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAlignedNonTemporal_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_r.csproj new file mode 100644 index 0000000..41a34bb --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_ro.csproj new file mode 100644 index 0000000..eaa180f --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreAligned_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh.cs new file mode 100644 index 0000000..72c471d --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + using (TestTable doubleTable = new TestTable(new double[2] { 1, -5 }, new double[2])) + { + var vf = Unsafe.Read>(doubleTable.inArrayPtr); + Sse2.StoreHigh((double*)(doubleTable.outArrayPtr), vf); + + if (!doubleTable.CheckResult((x, y) => y[0] == x[1] && y[1] == 0)) + { + Console.WriteLine("SSE2 StoreHigh failed on double:"); + foreach (var item in doubleTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + } + + return testResult; + } + + public unsafe struct TestTable : IDisposable where T : struct + { + public T[] inArray; + public T[] outArray; + + public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle; + GCHandle outHandle; + public TestTable(T[] a, T[] b) + { + this.inArray = a; + this.outArray = b; + + inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + return check(inArray, outArray); + } + + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + } + + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_r.csproj new file mode 100644 index 0000000..ca533fe --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_ro.csproj new file mode 100644 index 0000000..10acc69 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreHigh_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow.cs new file mode 100644 index 0000000..df156d7 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow.cs @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + using (TestTable doubleTable = new TestTable(new double[2] { 1, -5 }, new double[2])) + { + var vf = Unsafe.Read>(doubleTable.inArrayPtr); + Sse2.StoreLow((double*)(doubleTable.outArrayPtr), vf); + + if (!doubleTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x[0]) == BitConverter.DoubleToInt64Bits(y[0]) + && BitConverter.DoubleToInt64Bits(y[1]) == 0)) + { + Console.WriteLine("Sse2 StoreLow failed on double:"); + foreach (var item in doubleTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new long[2] { 1, -5 }, new long[2])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.StoreLow((long*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => y[0] == x[0] && y[1] == 0)) + { + Console.WriteLine("Sse2 StoreLow failed on long:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ulong[2] { 1, 5 }, new ulong[2])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Sse2.StoreLow((ulong*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => y[0] == x[0] && y[1] == 0)) + { + Console.WriteLine("Sse2 StoreLow failed on ulong:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + } + + return testResult; + } + + public unsafe struct TestTable : IDisposable where T : struct + { + public T[] inArray; + public T[] outArray; + + public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle; + GCHandle outHandle; + public TestTable(T[] a, T[] b) + { + this.inArray = a; + this.outArray = b; + + inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + return check(inArray, outArray); + } + + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + } + + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_r.csproj new file mode 100644 index 0000000..6a53cfe --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_ro.csproj new file mode 100644 index 0000000..5c7de6d --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreLow_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar.cs new file mode 100644 index 0000000..51d29a1 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar.cs @@ -0,0 +1,78 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + using (TestTable doubleTable = new TestTable(new double[2] { 1, -5 }, new double[2])) + { + var vf = Unsafe.Read>(doubleTable.inArrayPtr); + Sse2.StoreScalar((double*)(doubleTable.outArrayPtr), vf); + + if (!doubleTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x[0]) == BitConverter.DoubleToInt64Bits(y[0]) + && BitConverter.DoubleToInt64Bits(y[1]) == 0)) + { + Console.WriteLine("Sse2 StoreScalar failed on double:"); + foreach (var item in doubleTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + } + + return testResult; + } + + public unsafe struct TestTable : IDisposable where T : struct + { + public T[] inArray; + public T[] outArray; + + public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle; + GCHandle outHandle; + public TestTable(T[] a, T[] b) + { + this.inArray = a; + this.outArray = b; + + inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + return check(inArray, outArray); + } + + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + } + + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_r.csproj new file mode 100644 index 0000000..977ef94 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_ro.csproj new file mode 100644 index 0000000..2e6f570 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreScalar_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_r.csproj new file mode 100644 index 0000000..a803854 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_ro.csproj new file mode 100644 index 0000000..f0e1409 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Store_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + -- 2.7.4