From: Tanner Gooding Date: Tue, 30 Jan 2018 05:20:08 +0000 (-0800) Subject: Updating the HWIntrinsic codegen to support marking LoadVector128 and LoadAlignedVect... X-Git-Tag: accepted/tizen/unified/20190422.045933~3088 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=41f8b17ae1741dbcb15eaf0e7456c82548672889;p=platform%2Fupstream%2Fcoreclr.git Updating the HWIntrinsic codegen to support marking LoadVector128 and LoadAlignedVector128 as contained. --- diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index fb8b6b5..eddec19 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -1297,6 +1297,10 @@ void CodeGen::genConsumeRegs(GenTree* tree) { genConsumeReg(tree->gtGetOp1()); } + else if (tree->OperIsHWIntrinsic()) + { + genConsumeReg(tree->gtGetOp1()); + } else { #ifdef FEATURE_SIMD diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 99dbce1..e697b7b 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -4009,6 +4009,28 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT emitCurIGsize += sz; } +void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival) +{ + noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); + assert(IsSSEOrAVXInstruction(ins)); + + instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); + + id->idIns(ins); + id->idReg1(reg1); + + id->idInsFmt(IF_RRW_ARD_CNS); + id->idAddr()->iiaAddrMode.amBaseReg = base; + id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + + // Plus one for the 1-byte immediate (ival) + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; +} + void emitter::emitIns_R_C_I( instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) { @@ -4202,6 +4224,30 @@ void emitter::emitIns_R_R_A_I( dispIns(id); emitCurIGsize += sz; } + +void emitter::emitIns_R_R_AR_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival) +{ + assert(IsSSEOrAVXInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins)); + + instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + + id->idInsFmt(IF_RWR_RRD_ARD_CNS); + id->idAddr()->iiaAddrMode.amBaseReg = base; + id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + + // Plus one for the 1-byte immediate (ival) + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; +} #endif // !LEGACY_BACKEND void emitter::emitIns_R_R_C_I( @@ -5396,6 +5442,23 @@ void emitter::emitIns_SIMD_R_R_A_I( } } +void emitter::emitIns_SIMD_R_R_AR_I( + instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base, int ival) +{ + if (UseVEXEncoding()) + { + emitIns_R_R_AR_I(ins, attr, reg, reg1, base, 0, ival); + } + else + { + if (reg1 != reg) + { + emitIns_R_R(INS_movaps, attr, reg, reg1); + } + emitIns_R_AR_I(ins, attr, reg, base, 0, ival); + } +} + void emitter::emitIns_SIMD_R_R_C_I( instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) { diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index cca099c..8542767 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -386,6 +386,8 @@ void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* i void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival); +void emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival); + void emitIns_R_C_I(instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival); void emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival); @@ -405,6 +407,8 @@ void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg #ifndef LEGACY_BACKEND void emitIns_R_R_A_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt); +void emitIns_R_R_AR_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival); #endif // !LEGACY_BACKEND void emitIns_R_R_C_I( @@ -475,6 +479,7 @@ void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, #ifdef FEATURE_HW_INTRINSICS void emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base); void emitIns_SIMD_R_R_A_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival); +void emitIns_SIMD_R_R_AR_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base, int ival); void emitIns_SIMD_R_R_C_I( instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival); void emitIns_SIMD_R_R_R_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int ival); diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index e3e703e..1aea1f1 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -215,6 +215,9 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) if (op2->isContained() || op2->isUsedFromSpillTemp()) { + assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0); + assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional()); + TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; unsigned offset = (unsigned)-1; @@ -229,6 +232,11 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) compiler->tmpRlsTemp(tmpDsc); } + else if (op2->OperIsHWIntrinsic()) + { + emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum); + return; + } else if (op2->isIndir()) { GenTreeIndir* memIndir = op2->AsIndir(); @@ -242,7 +250,6 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) offset = 0; // Ensure that all the GenTreeIndir values are set to their defaults. - assert(memBase->gtRegNum == REG_NA); assert(!memIndir->HasIndex()); assert(memIndir->Scale() == 1); assert(memIndir->Offset() == 0); @@ -310,6 +317,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) regNumber targetReg = node->gtRegNum; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); + emitAttr simdSize = (emitAttr)(node->gtSIMDSize); int ival = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId); emitter* emit = getEmitter(); @@ -323,6 +331,9 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) if (op2->isContained() || op2->isUsedFromSpillTemp()) { + assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0); + assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional()); + TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; unsigned offset = (unsigned)-1; @@ -337,6 +348,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) compiler->tmpRlsTemp(tmpDsc); } + else if (op2->OperIsHWIntrinsic()) + { + emit->emitIns_SIMD_R_R_AR_I(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, ival); + return; + } else if (op2->isIndir()) { GenTreeIndir* memIndir = op2->AsIndir(); @@ -350,7 +366,6 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) offset = 0; // Ensure that all the GenTreeIndir values are set to their defaults. - assert(memBase->gtRegNum == REG_NA); assert(!memIndir->HasIndex()); assert(memIndir->Scale() == 1); assert(memIndir->Offset() == 0); @@ -360,14 +375,14 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) case GT_CLS_VAR_ADDR: { - emit->emitIns_SIMD_R_R_C_I(ins, emitTypeSize(targetType), targetReg, op1Reg, - memBase->gtClsVar.gtClsVarHnd, 0, ival); + emit->emitIns_SIMD_R_R_C_I(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0, + ival); return; } default: { - emit->emitIns_SIMD_R_R_A_I(ins, emitTypeSize(targetType), targetReg, op1Reg, memIndir, ival); + emit->emitIns_SIMD_R_R_A_I(ins, simdSize, targetReg, op1Reg, memIndir, ival); return; } } @@ -405,11 +420,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); assert(offset != (unsigned)-1); - emit->emitIns_SIMD_R_R_S_I(ins, emitTypeSize(targetType), targetReg, op1Reg, varNum, offset, ival); + emit->emitIns_SIMD_R_R_S_I(ins, simdSize, targetReg, op1Reg, varNum, offset, ival); } else { - emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(targetType), targetReg, op1Reg, op2->gtRegNum, ival); + emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2->gtRegNum, ival); } } diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 9237f1a..18b0bc9 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -176,7 +176,7 @@ HARDWARE_INTRINSIC(SSE41_BlendVariable, "BlendVaria // SSE42 Intrinsics HARDWARE_INTRINSIC(SSE42_IsSupported, "get_IsSupported", SSE42, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE42_Crc32, "Crc32", SSE42, -1, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE42_Crc32, "Crc32", SSE42, -1, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) // AVX Intrinsics // TODO-XArch When implementing SetZeroVector256 add case to switch table in gentree.cpp @@ -207,14 +207,14 @@ HARDWARE_INTRINSIC(FMA_IsSupported, "get_IsSupp // LZCNT Intrinsics HARDWARE_INTRINSIC(LZCNT_IsSupported, "get_IsSupported", LZCNT, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount, "LeadingZeroCount", LZCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount, "LeadingZeroCount", LZCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) // PCLMULQDQ Intrinsics HARDWARE_INTRINSIC(PCLMULQDQ_IsSupported, "get_IsSupported", PCLMULQDQ, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) // POPCNT Intrinsics HARDWARE_INTRINSIC(POPCNT_IsSupported, "get_IsSupported", POPCNT, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(POPCNT_PopCount, "PopCount", POPCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(POPCNT_PopCount, "PopCount", POPCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 88b0eda..5f16dd0 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -420,6 +420,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, int numArgs = sig->numArgs; var_types retType = JITtype2varType(sig->retType); var_types baseType = TYP_UNKNOWN; + if (retType == TYP_STRUCT && featureSIMD) { unsigned int sizeBytes; @@ -482,6 +483,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } + if ((flags & HW_Flag_NoFloatingPointUsed) == 0) + { + // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but + // where no SIMD local vars are in use. This is the same logic as is used for FEATURE_SIMD. + compFloatingPointUsed = true; + } + // table-driven importer of simple intrinsics if (impIsTableDrivenHWIntrinsic(category, flags)) { diff --git a/src/jit/lower.h b/src/jit/lower.h index 2f7d3bd..0d298e0 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -319,11 +319,6 @@ private: public: static bool IndirsAreEquivalent(GenTree* pTreeA, GenTree* pTreeB); -private: - static bool NodesAreEquivalentLeaves(GenTree* candidate, GenTree* storeInd); - - bool AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index); - // return true if 'childNode' is an immediate that can be contained // by the 'parentNode' (i.e. folded into an instruction) // for example small enough and non-relocatable @@ -335,6 +330,16 @@ private: return m_lsra->isContainableMemoryOp(node); } +#ifdef FEATURE_HW_INTRINSICS + // Return true if 'node' is a containable HWIntrinsic op. + bool IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node); +#endif // FEATURE_HW_INTRINSICS + +private: + static bool NodesAreEquivalentLeaves(GenTree* candidate, GenTree* storeInd); + + bool AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index); + // Makes 'childNode' contained in the 'parentNode' void MakeSrcContained(GenTree* parentNode, GenTree* childNode); diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index d212d86..559d0c7 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2297,6 +2297,65 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) #ifdef FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- +// IsContainableHWIntrinsicOp: Return true if 'node' is a containable HWIntrinsic op. +// +// Arguments: +// containingNode - The hardware intrinsic node which contains 'node' +// node - The node to check +// +// Return Value: +// true if 'node' is a containable hardware intrinsic node; otherwise, false. +// +bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node) +{ + if (!node->OperIsHWIntrinsic()) + { + // non-HWIntrinsic nodes are assumed to be unaligned loads, which are only + // supported by the VEX encoding. + return comp->canUseVexEncoding() && IsContainableMemoryOp(node); + } + + bool isContainable = false; + + // TODO-XArch: Update this to be table driven, if possible. + + NamedIntrinsic containingIntrinsicID = containingNode->gtHWIntrinsicId; + HWIntrinsicCategory containingCategory = Compiler::categoryOfHWIntrinsic(containingIntrinsicID); + NamedIntrinsic intrinsicID = node->AsHWIntrinsic()->gtHWIntrinsicId; + + switch (intrinsicID) + { + // Non-VEX encoded instructions require aligned memory ops, so we can fold them. + // However, we cannot do the same for the VEX-encoding as it changes an observable + // side-effect and may mask an Access Violation that would otherwise occur. + case NI_SSE_LoadAlignedVector128: + isContainable = (containingCategory == HW_Category_SimpleSIMD) && !comp->canUseVexEncoding(); + break; + + // Only fold a scalar load into a SIMD scalar intrinsic to ensure the number of bits + // read remains the same. Likewise, we can't fold a larger load into a SIMD scalar + // intrinsic as that would read fewer bits that requested. + case NI_SSE_LoadScalarVector128: + isContainable = (containingCategory == HW_Category_SIMDScalar); + break; + + // VEX encoding supports unaligned memory ops, so we can fold them + case NI_SSE_LoadVector128: + isContainable = (containingCategory == HW_Category_SimpleSIMD) && comp->canUseVexEncoding(); + break; + + default: + return false; + } + + // For containable nodes, the base type of the original node and the base type of the contained node + // should be the same. This helps ensure we aren't reading too many or too few bits. + assert(!isContainable || (containingNode->gtSIMDBaseType == node->AsHWIntrinsic()->gtSIMDBaseType)); + + return isContainable; +} + +//---------------------------------------------------------------------------------------------- // ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. // // Arguments: @@ -2311,25 +2370,45 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); + if ((flags & HW_Flag_NoContainment) != 0) + { + // Exit early if containment isn't supported + return; + } + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained - // TODO-XArch-CQ: Non-VEX encoded instructions require memory ops to be aligned - if (comp->canUseVexEncoding() && numArgs == 2 && (flags & HW_Flag_NoContainment) == 0 && - category == HW_Category_SimpleSIMD) + if (numArgs == 2) { - if (IsContainableMemoryOp(op2)) + switch (category) { - MakeSrcContained(node, op2); - } - else - { - // TODO-XArch-CQ: Commutative operations can have op1 be contained - op2->SetRegOptional(); + case HW_Category_SimpleSIMD: + case HW_Category_SIMDScalar: + if (IsContainableHWIntrinsicOp(node, op2)) + { + MakeSrcContained(node, op2); + } + else if (((flags & HW_Flag_Commutative) != 0) && IsContainableHWIntrinsicOp(node, op1)) + { + MakeSrcContained(node, op1); + + // Swap the operands here to make the containment checks in codegen significantly simpler + node->gtOp1 = op2; + node->gtOp2 = op1; + } + else if (comp->canUseVexEncoding()) + { + // We can only mark as reg optional when using the VEX encoding + // since that supports unaligned mem operands and non-VEX doesn't + op2->SetRegOptional(); + } + break; + + default: + break; } } - - // TODO - change to all IMM intrinsics - if (intrinsicID == NI_SSE_Shuffle) + else if (intrinsicID == NI_SSE_Shuffle) // TODO - change to all IMM intrinsics { assert(op1->OperIsList()); GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current(); diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp index 0d3cccb..f3f019d 100644 --- a/src/jit/lsrabuild.cpp +++ b/src/jit/lsrabuild.cpp @@ -2614,6 +2614,11 @@ int LinearScan::GetOperandInfo(GenTree* node) const unsigned srcCount = GetIndirInfo(node->AsIndir()); return srcCount; } + if (node->OperIsHWIntrinsic()) + { + appendLocationInfoToList(node->gtGetOp1()); + return 1; + } return 0; } diff --git a/src/jit/namedintrinsiclist.h b/src/jit/namedintrinsiclist.h index a6edd86..3bc8597 100644 --- a/src/jit/namedintrinsiclist.h +++ b/src/jit/namedintrinsiclist.h @@ -77,6 +77,9 @@ enum HWIntrinsicFlag : unsigned int // Select base type using argument type HW_Flag_BaseTypeFromArg = 0x400, + + // Indicates compFloatingPointUsed does not need to be set. + HW_Flag_NoFloatingPointUsed = 0x800 }; inline HWIntrinsicFlag operator|(HWIntrinsicFlag c1, HWIntrinsicFlag c2) diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 3696f40..b40abd6 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -848,7 +848,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStackisAddrOfSIMDType(node->AsBlk()->Addr())) { GenTree* dataSrc = parent->gtGetOp2(); - if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD)) + if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD) && (!dataSrc->OperIsHWIntrinsic())) { noway_assert(dataSrc->OperIsIndir()); keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr());