From 76c9ccfa1a2f3aea757ab3851c35443c5b60ec90 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 27 Feb 2018 20:40:19 -0800 Subject: [PATCH] Set isInternalRegDelayFree for several of the x86 hwintrinsics --- src/jit/hwintrinsiccodegenxarch.cpp | 27 +++++++++++++++++++++------ src/jit/lsraxarch.cpp | 2 ++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 2c6a184..24829ea 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -608,10 +608,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; - regNumber tmpReg = node->GetSingleTempReg(); + op2Reg = op2->gtRegNum; + regNumber tmpReg = node->GetSingleTempReg(); + instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); + + // Ensure we aren't overwriting targetReg + assert(tmpReg != targetReg); - instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); @@ -677,11 +680,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; + op2Reg = op2->gtRegNum; + regNumber tmpReg = node->GetSingleTempReg(); + instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); - regNumber tmpReg = node->GetSingleTempReg(); + // Ensure we aren't overwriting targetReg + assert(tmpReg != targetReg); - instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); @@ -752,6 +757,10 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) if (op1Reg == targetReg) { regNumber tmpReg = node->GetSingleTempReg(); + + // Ensure we aren't overwriting targetReg + assert(tmpReg != targetReg); + emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg); op1Reg = tmpReg; } @@ -837,6 +846,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) regNumber tmpReg = node->GetSingleTempReg(); instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); + // Ensure we aren't overwriting targetReg + assert(tmpReg != targetReg); + emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); @@ -906,6 +918,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); regNumber tmpReg = node->GetSingleTempReg(); + // Ensure we aren't overwriting targetReg + assert(tmpReg != targetReg); + emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index d60d213..c2c1992 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2344,12 +2344,14 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) case NI_SSE2_CompareNotEqualUnorderedScalar: info->internalIntCount = 1; info->setInternalCandidates(this, RBM_BYTE_REGS); + info->isInternalRegDelayFree = true; break; case NI_SSE_SetScalarVector128: // Need an internal register to stitch together all the values into a single vector in a SIMD reg. info->internalFloatCount = 1; info->setInternalCandidates(this, allSIMDRegs()); + info->isInternalRegDelayFree = true; break; case NI_SSE_ConvertToSingle: -- 2.7.4