From 87526fb97efcf77092e04180ed9f5be10a085024 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 27 Jul 2023 19:07:25 +0200 Subject: [PATCH] JIT: Handle half accesses for SIMDs in local morph (#89520) While it's not generally expected that halves can be accessed directly (ending up with LCL_FLD), it sometimes happens in some of the SW implementations of Vector256/Vector512 methods. In rare cases the JIT still falls back to these even with there is HW acceleration. In those cases we want to avoid DNER'ing the involved locals, so expand the existing recognition with these patterns. Also add a check to the existing SIMD16 -> SIMD12 to verify the source is a SIMD16. Fix #85359 Fix #89456 Some size wise regressions are expected, which come down to - A large number of similar looking tests end up now enregistering some locals that cause new upper half saves/restores to be required. This accounts for most of the size-wise regressions. - The expansions often do not result in smaller code because loading/storing the halves directly from/to stack is smaller code than the vector equivalent with extraction/insertion. Many of the regressions are in SW implementations of Vector256/Vector512 methods that we usually do not expect to see called with HW acceleration supported. --- src/coreclr/jit/lclmorph.cpp | 129 ++++++++++++++++----- .../JitBlue/Runtime_89456/Runtime_89456.cs | 36 ++++++ .../JitBlue/Runtime_89456/Runtime_89456.csproj | 8 ++ 3 files changed, 144 insertions(+), 29 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.csproj diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 0f19090..c7cd158 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -936,18 +936,45 @@ private: var_types elementType = indir->TypeGet(); lclNode = indir->gtGetOp1()->BashToLclVar(m_compiler, lclNum); - if (elementType == TYP_FLOAT) + switch (elementType) { - GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType)); - hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, CORINFO_TYPE_FLOAT, - genTypeSize(varDsc)); - } - else - { - assert(elementType == TYP_SIMD12); - assert(genTypeSize(varDsc) == 16); - hwiNode = m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3, - CORINFO_TYPE_FLOAT, 16); + case TYP_FLOAT: + { + GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType)); + hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, + CORINFO_TYPE_FLOAT, genTypeSize(varDsc)); + break; + } + case TYP_SIMD12: + { + assert(genTypeSize(varDsc) == 16); + hwiNode = m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3, + CORINFO_TYPE_FLOAT, 16); + break; + } + case TYP_SIMD8: +#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) + case TYP_SIMD16: + case TYP_SIMD32: +#endif + { + assert(genTypeSize(elementType) * 2 == genTypeSize(varDsc)); + if (offset == 0) + { + hwiNode = m_compiler->gtNewSimdGetLowerNode(elementType, lclNode, CORINFO_TYPE_FLOAT, + genTypeSize(varDsc)); + } + else + { + assert(offset == genTypeSize(elementType)); + hwiNode = m_compiler->gtNewSimdGetUpperNode(elementType, lclNode, CORINFO_TYPE_FLOAT, + genTypeSize(varDsc)); + } + + break; + } + default: + unreached(); } indir = hwiNode; @@ -962,23 +989,50 @@ private: GenTree* simdLclNode = m_compiler->gtNewLclVarNode(lclNum); GenTree* elementNode = indir->AsIndir()->Data(); - if (elementType == TYP_FLOAT) + switch (elementType) { - GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType)); - hwiNode = - m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode, - CORINFO_TYPE_FLOAT, genTypeSize(varDsc)); - } - else - { - assert(elementType == TYP_SIMD12); - assert(varDsc->TypeGet() == TYP_SIMD16); - - // We inverse the operands here and take elementNode as the main value and simdLclNode[3] as the - // new value. This gives us a new TYP_SIMD16 with all elements in the right spots - GenTree* indexNode = m_compiler->gtNewIconNode(3, TYP_INT); - hwiNode = m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode, simdLclNode, - CORINFO_TYPE_FLOAT, 16); + case TYP_FLOAT: + { + GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType)); + hwiNode = + m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode, + CORINFO_TYPE_FLOAT, genTypeSize(varDsc)); + break; + } + case TYP_SIMD12: + { + assert(varDsc->TypeGet() == TYP_SIMD16); + + // We inverse the operands here and take elementNode as the main value and simdLclNode[3] as the + // new value. This gives us a new TYP_SIMD16 with all elements in the right spots + GenTree* indexNode = m_compiler->gtNewIconNode(3, TYP_INT); + hwiNode = m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode, simdLclNode, + CORINFO_TYPE_FLOAT, 16); + break; + } + case TYP_SIMD8: +#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) + case TYP_SIMD16: + case TYP_SIMD32: +#endif + { + assert(genTypeSize(elementType) * 2 == genTypeSize(varDsc)); + if (offset == 0) + { + hwiNode = m_compiler->gtNewSimdWithLowerNode(varDsc->TypeGet(), simdLclNode, elementNode, + CORINFO_TYPE_FLOAT, genTypeSize(varDsc)); + } + else + { + assert(offset == genTypeSize(elementType)); + hwiNode = m_compiler->gtNewSimdWithUpperNode(varDsc->TypeGet(), simdLclNode, elementNode, + CORINFO_TYPE_FLOAT, genTypeSize(varDsc)); + } + + break; + } + default: + unreached(); } indir->ChangeType(varDsc->TypeGet()); @@ -1112,9 +1166,10 @@ private: #ifdef FEATURE_HW_INTRINSICS if (varTypeIsSIMD(varDsc)) { - // We have two cases we want to handle: + // We have three cases we want to handle: // 1. Vector2/3/4 and Quaternion where we have 4x float fields // 2. Plane where we have 1x Vector3 and 1x float field + // 3. Accesses of halves of larger SIMD types if (indir->TypeIs(TYP_FLOAT)) { @@ -1125,11 +1180,27 @@ private: } else if (indir->TypeIs(TYP_SIMD12)) { - if ((offset == 0) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset == 0) && (varDsc->TypeGet() == TYP_SIMD16) && m_compiler->IsBaselineSimdIsaSupported()) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } } +#ifdef TARGET_ARM64 + else if (indir->TypeIs(TYP_SIMD8)) + { + if ((varDsc->TypeGet() == TYP_SIMD16) && ((offset % 8) == 0)) + { + return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; + } + } +#endif +#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) + else if (indir->TypeIs(TYP_SIMD16, TYP_SIMD32) && (genTypeSize(indir) * 2 == genTypeSize(varDsc)) && + ((offset % genTypeSize(indir)) == 0)) + { + return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; + } +#endif // FEATURE_SIMD && TARGET_XARCH } #endif // FEATURE_HW_INTRINSICS diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.cs b/src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.cs new file mode 100644 index 0000000..252aa08 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.cs @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using Xunit; + +public class Runtime_89456 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector3 Reinterp128(Vector128 v) + { + return Unsafe.As, Vector3>(ref v); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector3 Reinterp256(Vector256 v) + { + return Unsafe.As, Vector3>(ref v); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector3 Reinterp512(Vector512 v) + { + return Unsafe.As, Vector3>(ref v); + } + + [Fact] + public static void TestEntryPoint() + { + Reinterp128(default); + Reinterp256(default); + Reinterp512(default); + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.csproj new file mode 100644 index 0000000..de6d5e0 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.csproj @@ -0,0 +1,8 @@ + + + True + + + + + -- 2.7.4