From c8fc83399a7f05fae24fa4083bd5385fd06e823c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 26 Aug 2019 18:16:15 +0000 Subject: [PATCH] Fix SIMD local alignment check for empty rsp-based frames Fixes dotnet/coreclr#26022 Commit migrated from https://github.com/dotnet/coreclr/commit/cd9215e39cda8846003210d6ec415211207bebc7 --- src/coreclr/src/jit/compiler.h | 47 ++++++++++++++++------ .../HardwareIntrinsics/X86/Sse2/AlignVector128.cs | 33 +++++++++++++++ .../X86/Sse2/AlignVector128_r.csproj | 12 ++++++ .../X86/Sse2/AlignVector128_ro.csproj | 12 ++++++ src/coreclr/tests/src/JIT/Stress/ABI/ABIs.cs | 4 +- 5 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128.cs create mode 100644 src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_r.csproj create mode 100644 src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_ro.csproj diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 3f282f2..3687c89 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -8117,26 +8117,49 @@ private: // Returns true if the TYP_SIMD locals on stack are aligned at their // preferred byte boundary specified by getSIMDTypeAlignment(). // - // As per the Intel manual, the preferred alignment for AVX vectors is 32-bytes. On Amd64, - // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even - // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in - // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX - // 256-bit vectors with unaligned load/stores to/from memory. On x86, the stack frame - // is aligned to 4 bytes. We need to extend existing support for double (8-byte) alignment - // to 16 or 32 byte alignment for frames with local SIMD vars, if that is determined to be + // As per the Intel manual, the preferred alignment for AVX vectors is + // 32-bytes. It is not clear whether additional stack space used in + // aligning stack is worth the benefit and for now will use 16-byte + // alignment for AVX 256-bit vectors with unaligned load/stores to/from + // memory. On x86, the stack frame is aligned to 4 bytes. We need to extend + // existing support for double (8-byte) alignment to 16 or 32 byte + // alignment for frames with local SIMD vars, if that is determined to be // profitable. // + // On Amd64 and SysV, RSP+8 is aligned on entry to the function (before + // prolog has run). This means that in RBP-based frames RBP will be 16-byte + // aligned. For RSP-based frames these are only sometimes aligned, depending + // on the frame size. + // bool isSIMDTypeLocalAligned(unsigned varNum) { #if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES if (lclVarIsSIMDType(varNum) && lvaTable[varNum].lvType != TYP_BYREF) { - bool ebpBased; - int off = lvaFrameAddress(varNum, &ebpBased); // TODO-Cleanup: Can't this use the lvExactSize on the varDsc? - int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType); - bool isAligned = (alignment <= STACK_ALIGN) && ((off % alignment) == 0); - return isAligned; + int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType); + if (alignment <= STACK_ALIGN) + { + bool rbpBased; + int off = lvaFrameAddress(varNum, &rbpBased); + // On SysV and Winx64 ABIs RSP+8 will be 16-byte aligned at the + // first instruction of a function. If our frame is RBP based + // then RBP will always be 16 bytes aligned, so we can simply + // check the offset. + if (rbpBased) + { + return (off % alignment) == 0; + } + + // For RSP-based frame the alignment of RSP depends on our + // locals. rsp+8 is aligned on entry and we just subtract frame + // size so it is not hard to compute. Note that the compiler + // tries hard to make sure the frame size means RSP will be + // 16-byte aligned, but for leaf functions without locals (i.e. + // frameSize = 0) it will not be. + int frameSize = codeGen->genTotalFrameSize(); + return ((8 - frameSize + off) % alignment) == 0; + } } #endif // FEATURE_SIMD diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128.cs new file mode 100644 index 0000000..70f733e --- /dev/null +++ b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +internal class VectorTest +{ + private static int Main() + { + s_f = default; // avoid helper in Foo below + Console.WriteLine(Foo(default, default)); + return 100; + } + + private static Vector128 s_f; + // The JIT was picking a simple rsp-based frame for this function and then + // believed the second vector is 16-byte aligned when it is not. + [MethodImpl(MethodImplOptions.NoInlining)] + private static long Foo(S24 a, Vector128 looksAligned) + { + s_f = looksAligned; + return 0; + } + + private struct S24 + { + public long A, B, C; + } +} diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_r.csproj b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_r.csproj new file mode 100644 index 0000000..e164f89 --- /dev/null +++ b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_r.csproj @@ -0,0 +1,12 @@ + + + Exe + + + None + + + + + + diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_ro.csproj b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_ro.csproj new file mode 100644 index 0000000..1a1082d --- /dev/null +++ b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse2/AlignVector128_ro.csproj @@ -0,0 +1,12 @@ + + + Exe + + + None + True + + + + + diff --git a/src/coreclr/tests/src/JIT/Stress/ABI/ABIs.cs b/src/coreclr/tests/src/JIT/Stress/ABI/ABIs.cs index 5309371..62d45cb 100644 --- a/src/coreclr/tests/src/JIT/Stress/ABI/ABIs.cs +++ b/src/coreclr/tests/src/JIT/Stress/ABI/ABIs.cs @@ -107,9 +107,7 @@ namespace ABIStress { typeof(byte), typeof(short), typeof(int), typeof(long), typeof(float), typeof(double), - // Vector128 is disabled for now due to - // https://github.com/dotnet/coreclr/issues/26022 - typeof(Vector), /*typeof(Vector128),*/ typeof(Vector256), + typeof(Vector), typeof(Vector128), typeof(Vector256), typeof(S1P), typeof(S2P), typeof(S2U), typeof(S3U), typeof(S4P), typeof(S4U), typeof(S5U), typeof(S6U), typeof(S7U), typeof(S8P), typeof(S8U), typeof(S9U), -- 2.7.4