From 3f115efc404b5b8953093d6e081a721db245c83c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 24 Dec 2017 17:15:54 -0800 Subject: [PATCH] Adding support for the SSE Set, SetAll, and SetZero intrinsics --- src/jit/compiler.h | 13 ++++++++ src/jit/compiler.hpp | 13 ++++++++ src/jit/gentree.cpp | 59 ++++++++++++++++++++++++++++++++++++ src/jit/gentree.h | 8 +++++ src/jit/hwintrinsiccodegenxarch.cpp | 25 ++++++++++++++-- src/jit/hwintrinsicxarch.cpp | 34 +++++++++++++++++++++ src/jit/lsraxarch.cpp | 60 ++++++++++++++++++++++++++++--------- 7 files changed, 196 insertions(+), 16 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 0679d31..7e45ed0 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -2057,10 +2057,22 @@ public: #endif #if FEATURE_HW_INTRINSICS + GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size); GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode( var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size); GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode( var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size); + GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type, + GenTree* op1, + GenTree* op2, + GenTree* op3, + GenTree* op4, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, @@ -2088,6 +2100,7 @@ public: GenTreeArgList* gtNewArgList(GenTreePtr op); GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2); GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3); + GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3, GenTreePtr op4); static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreeCall* call, unsigned argNum); static fgArgTabEntryPtr gtArgEntryByNode(GenTreeCall* call, GenTreePtr node); diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index ebd1894..b1dcde3 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -4891,6 +4891,19 @@ void GenTree::VisitOperands(TVisitor visitor) return; #endif // FEATURE_SIMD +#if FEATURE_HW_INTRINSICS + case GT_HWIntrinsic: + if ((this->AsHWIntrinsic()->gtOp1 != nullptr) && this->AsHWIntrinsic()->gtOp1->OperIsList()) + { + this->AsHWIntrinsic()->gtOp1->VisitListOperands(visitor); + } + else + { + VisitBinOpOperands(visitor); + } + return; +#endif // FEATURE_HW_INTRINSICS + // Special nodes case GT_CMPXCHG: { diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 5c4d916..8a7060a 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -5671,6 +5671,16 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) return TryGetUseBinOp(def, use); #endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HWIntrinsic: + if ((this->AsHWIntrinsic()->gtOp1 != nullptr) && this->AsHWIntrinsic()->gtOp1->OperIsList()) + { + return this->AsHWIntrinsic()->gtOp1->TryGetUseList(def, use); + } + + return TryGetUseBinOp(def, use); +#endif // FEATURE_HW_INTRINSICS + // Special nodes case GT_CMPXCHG: { @@ -6790,6 +6800,16 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2, GenTree /***************************************************************************** * + * Create a list out of the three values. + */ + +GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2, GenTreePtr arg3, GenTreePtr arg4) +{ + return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2, arg3, arg4)); +} + +/***************************************************************************** + * * Given a GT_CALL node, access the fgArgInfo and find the entry * that has the matching argNum and return the fgArgTableEntryPtr */ @@ -9211,6 +9231,24 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) return; #endif // FEATURE_SIMD +#if FEATURE_HW_INTRINSICS + case GT_HWIntrinsic: + if (m_node->AsHWIntrinsic()->gtOp1 == nullptr) + { + assert(m_node->NullOp1Legal()); + m_state = -1; + } + else if (m_node->AsHWIntrinsic()->gtOp1->OperIsList()) + { + SetEntryStateForList(m_node->AsHWIntrinsic()->gtOp1); + } + else + { + SetEntryStateForBinOp(); + } + return; +#endif // FEATURE_HW_INTRINSICS + // LEA, which may have no first operand case GT_LEA: if (m_node->AsAddrMode()->gtOp1 == nullptr) @@ -17863,6 +17901,14 @@ bool GenTree::isCommutativeSIMDIntrinsic() #endif // FEATURE_SIMD #if FEATURE_HW_INTRINSICS +GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size) +{ + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, hwIntrinsicID, baseType, size); +} + GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode( var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) { @@ -17875,6 +17921,19 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode( return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, baseType, size); } +GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, + GenTree* op1, + GenTree* op2, + GenTree* op3, + GenTree* op4, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size) +{ + return new (this, GT_HWIntrinsic) + GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3, op4), hwIntrinsicID, baseType, size); +} + GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID) { return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, hwIntrinsicID, TYP_UNKNOWN, 0); diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 7096b58..d66c835 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1663,6 +1663,9 @@ public: case GT_LEA: case GT_RETFILT: case GT_NOP: +#if FEATURE_HW_INTRINSICS + case GT_HWIntrinsic: +#endif // FEATURE_HW_INTRINSICS return true; case GT_RETURN: return gtType == TYP_VOID; @@ -4225,6 +4228,11 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic { NamedIntrinsic gtHWIntrinsicId; + GenTreeHWIntrinsic(var_types type, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + : GenTreeJitIntrinsic(GT_HWIntrinsic, type, nullptr, nullptr, baseType, size), gtHWIntrinsicId(hwIntrinsicID) + { + } + GenTreeHWIntrinsic(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) : GenTreeJitIntrinsic(GT_HWIntrinsic, type, op1, nullptr, baseType, size), gtHWIntrinsicId(hwIntrinsicID) { diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 2b43608..b47c8da 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -193,16 +193,24 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); + GenTree* op3 = nullptr; + GenTree* op4 = nullptr; regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; instruction ins = INS_invalid; - regNumber op1Reg = op1->gtRegNum; + regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; + regNumber op3Reg = REG_NA; + regNumber op4Reg = REG_NA; emitter* emit = getEmitter(); - genConsumeOperands(node); + if ((op1 != nullptr) && !op1->OperIsList()) + { + op1Reg = op1->gtRegNum; + genConsumeOperands(node); + } switch (intrinsicID) { @@ -332,6 +340,19 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R(INS_rsqrtps, targetReg, op1Reg, TYP_SIMD16); break; + case NI_SSE_SetAllVector128: + assert(baseType == TYP_FLOAT); + assert(op2 == nullptr); + emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op1Reg, 0, TYP_SIMD16); + break; + + case NI_SSE_SetZeroVector128: + assert(baseType == TYP_FLOAT); + assert(op1 == nullptr); + assert(op2 == nullptr); + emit->emitIns_SIMD_R_R_R(INS_xorps, targetReg, targetReg, targetReg, TYP_SIMD16); + break; + case NI_SSE_Sqrt: assert(baseType == TYP_FLOAT); assert(op2 == nullptr); diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 61b3d8f..764a80a 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -450,8 +450,29 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, GenTree* retNode = nullptr; GenTree* op1 = nullptr; GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + GenTree* op4 = nullptr; + switch (intrinsic) { + case NI_SSE_SetVector128: + { + assert(sig->numArgs == 4); + assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT); + + op4 = impPopStack().val; + op3 = impPopStack().val; + op2 = impPopStack().val; + op1 = impPopStack().val; + + GenTree* left = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, 16); + GenTree* right = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, 16); + GenTree* control = gtNewIconNode(68, TYP_UBYTE); + + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, 16); + break; + } + case NI_SSE_Add: case NI_SSE_And: case NI_SSE_AndNot: @@ -485,6 +506,13 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16); break; + case NI_SSE_SetAllVector128: + assert(sig->numArgs == 1); + assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT); + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16); + break; + case NI_SSE_Reciprocal: case NI_SSE_ReciprocalSqrt: case NI_SSE_Sqrt: @@ -494,6 +522,12 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16); break; + case NI_SSE_SetZeroVector128: + assert(sig->numArgs == 0); + assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, intrinsic, TYP_FLOAT, 16); + break; + default: JITDUMP("Not implemented hardware intrinsic"); break; diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index a80c6bd..c0c0eac 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2505,31 +2505,63 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, { NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; InstructionSet isa = compiler->isaOfHWIntrinsic(intrinsicID); + if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2) { SetContainsAVXFlags(true, 32); } - info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp1); - if (intrinsicTree->gtGetOp2IfPresent() != nullptr) + + GenTree* op1 = intrinsicTree->gtOp.gtOp1; + GenTree* op2 = intrinsicTree->gtOp.gtOp2; + info->srcCount = 0; + + if (op1 != nullptr) + { + if (op1->OperIsList()) + { + int srcCount = 0; + + for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) + { + GenTree* listItem = list->Current(); + srcCount += GetOperandInfo(listItem); + } + + info->srcCount += srcCount; + } + else + { + info->srcCount += GetOperandInfo(op1); + } + } + if (op2 != nullptr) { - info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp2); + info->srcCount += GetOperandInfo(op2); } -#ifdef _TARGET_X86_ - if (intrinsicTree->gtHWIntrinsicId == NI_SSE42_Crc32) + switch (intrinsicID) { - // CRC32 may operate over "byte" but on x86 only RBM_BYTE_REGS can be used as byte registers. - // - // TODO - currently we use the BaseType to bring the type of the second argument - // to the code generator. May encode the overload info in other way. - var_types srcType = intrinsicTree->gtSIMDBaseType; - if (varTypeIsByte(srcType)) +#ifdef _TARGET_X86_ + case NI_SSE42_Crc32: { - LocationInfoListNode* op2Info = useList.GetSecond(INDEBUG(intrinsicTree->gtGetOp2())); - op2Info->info.setSrcCandidates(this, RBM_BYTE_REGS); + // CRC32 may operate over "byte" but on x86 only RBM_BYTE_REGS can be used as byte registers. + // + // TODO - currently we use the BaseType to bring the type of the second argument + // to the code generator. May encode the overload info in other way. + var_types srcType = intrinsicTree->gtSIMDBaseType; + if (varTypeIsByte(srcType)) + { + LocationInfoListNode* op2Info = useList.GetSecond(INDEBUG(intrinsicTree->gtGetOp2())); + op2Info->info.setSrcCandidates(this, RBM_BYTE_REGS); + } + break; } +#endif // _TARGET_X86_ + + default: + assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END)); + break; } -#endif } #endif -- 2.7.4