From 89852773751f55a3d017657e1205d4797ba56ab7 Mon Sep 17 00:00:00 2001 From: Fei Peng Date: Mon, 5 Feb 2018 22:25:07 -0800 Subject: [PATCH] Implement AVX Store* intrinsics --- src/jit/hwintrinsiccodegenxarch.cpp | 4 +- src/jit/hwintrinsiclistxarch.h | 3 + tests/src/JIT/HardwareIntrinsics/X86/Avx/Store.cs | 238 +++++++++++++++++ .../JIT/HardwareIntrinsics/X86/Avx/StoreAligned.cs | 289 +++++++++++++++++++++ .../X86/Avx/StoreAlignedNonTemporal.cs | 289 +++++++++++++++++++++ .../X86/Avx/StoreAlignedNonTemporal_r.csproj | 34 +++ .../X86/Avx/StoreAlignedNonTemporal_ro.csproj | 34 +++ .../X86/Avx/StoreAligned_r.csproj | 34 +++ .../X86/Avx/StoreAligned_ro.csproj | 34 +++ .../JIT/HardwareIntrinsics/X86/Avx/Store_r.csproj | 34 +++ .../JIT/HardwareIntrinsics/X86/Avx/Store_ro.csproj | 34 +++ 11 files changed, 1025 insertions(+), 2 deletions(-) create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/Store.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_ro.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_ro.csproj diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 1aea1f1..7ffc3e8 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -95,7 +95,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genConsumeOperands(node); if (category == HW_Category_MemoryStore) { - emit->emitIns_AR_R(ins, emitTypeSize(TYP_SIMD16), op2->gtRegNum, op1->gtRegNum, 0); + emit->emitIns_AR_R(ins, simdSize, op2->gtRegNum, op1->gtRegNum, 0); } else if ((ival != -1) && varTypeIsFloating(baseType)) { @@ -103,7 +103,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (category == HW_Category_MemoryLoad) { - emit->emitIns_SIMD_R_R_AR(ins, emitTypeSize(TYP_SIMD16), targetReg, op1->gtRegNum, op2->gtRegNum); + emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1->gtRegNum, op2->gtRegNum); } else { diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 18b0bc9..d32aaf7 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -186,6 +186,9 @@ HARDWARE_INTRINSIC(AVX_Add, "Add", HARDWARE_INTRINSIC(AVX_Multiply, "Multiply", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_Reciprocal, "Reciprocal", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_BlendVariable, "BlendVariable", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX_Store, "Store", AVX, -1, 32, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX_StoreAligned, "StoreAligned", AVX, -1, 32, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", AVX, - 1, 32, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoFlag) // AVX2 Intrinsics HARDWARE_INTRINSIC(AVX2_IsSupported, "get_IsSupported", AVX2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store.cs new file mode 100644 index 0000000..3b01058 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store.cs @@ -0,0 +1,238 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Avx.IsSupported) + { + using (TestTable doubleTable = new TestTable(new double[4] { 1, -5, 100, 0 }, new double[4])) + { + var vf = Unsafe.Read>(doubleTable.inArrayPtr); + Avx.Store((double*)(doubleTable.outArrayPtr), vf); + + if (!doubleTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y))) + { + Console.WriteLine("Avx Store failed on double:"); + foreach (var item in doubleTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable floatTable = new TestTable(new float[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new float[8])) + { + var vf = Unsafe.Read>(floatTable.inArrayPtr); + Avx.Store((float*)(floatTable.outArrayPtr), vf); + + if (!floatTable.CheckResult((x, y) => BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y))) + { + Console.WriteLine("Avx Store failed on float:"); + foreach (var item in floatTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new long[4] { 1, -5, 100, 0 }, new long[4])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((long*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on long:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ulong[4] { 1, 5, 100, 0 }, new ulong[4])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((ulong*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on ulong:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new int[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new int[8])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((int*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on int:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new uint[8] { 1, 5, 100, 0, 1, 2, 3, 4 }, new uint[8])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((uint*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on uint:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new short[16] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new short[16])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((short*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on short:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ushort[16] { 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4 }, new ushort[16])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((ushort*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on ushort:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new sbyte[32] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new sbyte[32])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((sbyte*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on sbyte:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new byte[32] { 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4 }, new byte[32])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx.Store((byte*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((x, y) => x == y)) + { + Console.WriteLine("Avx Store failed on byte:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + } + + return testResult; + } + + public unsafe struct TestTable : IDisposable where T : struct + { + public T[] inArray; + public T[] outArray; + + public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle; + GCHandle outHandle; + public TestTable(T[] a, T[] b) + { + this.inArray = a; + this.outArray = b; + + inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + for (int i = 0; i < inArray.Length; i++) + { + if (!check(inArray[i], outArray[i])) + { + return false; + } + } + return true; + } + + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + } + + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned.cs new file mode 100644 index 0000000..07a694d --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned.cs @@ -0,0 +1,289 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Avx.IsSupported) + { + { + double* inArray = stackalloc double[4]; + byte* outBuffer = stackalloc byte[64]; + double* outArray = (double*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("Avx StoreAligned failed on double:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + float* inArray = stackalloc float[8]; + byte* outBuffer = stackalloc byte[64]; + float* outArray = (float*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (BitConverter.SingleToInt32Bits(inArray[i]) != BitConverter.SingleToInt32Bits(outArray[i])) + { + Console.WriteLine("Avx StoreAligned failed on float:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + long* inArray = stackalloc long[4]; + byte* outBuffer = stackalloc byte[64]; + long* outArray = (long*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on long:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[4]; + byte* outBuffer = stackalloc byte[64]; + ulong* outArray = (ulong*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on ulong:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + int* inArray = stackalloc int[8]; + byte* outBuffer = stackalloc byte[64]; + int* outArray = (int*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on int:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[8]; + byte* outBuffer = stackalloc byte[64]; + uint* outArray = (uint*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on uint:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + short* inArray = stackalloc short[16]; + byte* outBuffer = stackalloc byte[64]; + short* outArray = (short*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on short:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ushort* inArray = stackalloc ushort[16]; + byte* outBuffer = stackalloc byte[64]; + ushort* outArray = (ushort*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on ushort:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inArray = stackalloc byte[32]; + byte* outBuffer = stackalloc byte[64]; + byte* outArray = (byte*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on byte:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + sbyte* inArray = stackalloc sbyte[32]; + byte* outBuffer = stackalloc byte[64]; + sbyte* outArray = (sbyte*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAligned(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAligned failed on byte:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + return testResult; + } + + static unsafe void* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (void*)(buffer + misalignment); + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal.cs new file mode 100644 index 0000000..81f4ce9 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal.cs @@ -0,0 +1,289 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Avx.IsSupported) + { + { + double* inArray = stackalloc double[4]; + byte* outBuffer = stackalloc byte[64]; + double* outArray = (double*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on double:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + float* inArray = stackalloc float[8]; + byte* outBuffer = stackalloc byte[64]; + float* outArray = (float*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (BitConverter.SingleToInt32Bits(inArray[i]) != BitConverter.SingleToInt32Bits(outArray[i])) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on float:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + long* inArray = stackalloc long[4]; + byte* outBuffer = stackalloc byte[64]; + long* outArray = (long*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on long:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[4]; + byte* outBuffer = stackalloc byte[64]; + ulong* outArray = (ulong*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on ulong:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + int* inArray = stackalloc int[8]; + byte* outBuffer = stackalloc byte[64]; + int* outArray = (int*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on int:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[8]; + byte* outBuffer = stackalloc byte[64]; + uint* outArray = (uint*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on uint:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + short* inArray = stackalloc short[16]; + byte* outBuffer = stackalloc byte[64]; + short* outArray = (short*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on short:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ushort* inArray = stackalloc ushort[16]; + byte* outBuffer = stackalloc byte[64]; + ushort* outArray = (ushort*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on ushort:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inArray = stackalloc byte[32]; + byte* outBuffer = stackalloc byte[64]; + byte* outArray = (byte*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on byte:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + sbyte* inArray = stackalloc sbyte[32]; + byte* outBuffer = stackalloc byte[64]; + sbyte* outArray = (sbyte*)Align(outBuffer, 32); + + var vf = Unsafe.Read>(inArray); + Avx.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("Avx StoreAlignedNonTemporal failed on byte:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + return testResult; + } + + static unsafe void* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (void*)(buffer + misalignment); + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_r.csproj new file mode 100644 index 0000000..81ca111 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_ro.csproj new file mode 100644 index 0000000..1170f4b --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAlignedNonTemporal_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_r.csproj new file mode 100644 index 0000000..41a34bb --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_ro.csproj new file mode 100644 index 0000000..eaa180f --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/StoreAligned_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_r.csproj new file mode 100644 index 0000000..a803854 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_ro.csproj new file mode 100644 index 0000000..f0e1409 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Avx/Store_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + -- 2.7.4