From: Bruce Forstall Date: Thu, 1 Dec 2016 08:15:53 +0000 (-0800) Subject: Fix recent x86 SIMD regressions X-Git-Tag: submit/tizen/20210909.063632~11030^2~8747^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f4576dec5e6f252d6a728e09cfba7ad4b1e55123;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Fix recent x86 SIMD regressions 1. Recent PUTARG_STK work didn't consider SIMD arguments. 2. SSE3_4 work caused underestimation of instruction sizes for SSE4 instructions (e.g., pmulld). Commit migrated from https://github.com/dotnet/coreclr/commit/76390e45acb3aa5379d56cceb6f54f9555f96181 --- diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 8c85126..aa2484c 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -7498,6 +7498,17 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) // bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) { +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(putArgStk)) + { + const unsigned argSize = genTypeSize(putArgStk); + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + genStackLevel += argSize; + m_pushStkArg = false; + return true; + } +#endif // FEATURE_SIMD + const unsigned argSize = putArgStk->getArgSize(); // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack. @@ -7532,10 +7543,13 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) m_pushStkArg = true; return false; } - m_pushStkArg = false; - inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); - genStackLevel += argSize; - return true; + else + { + m_pushStkArg = false; + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + genStackLevel += argSize; + return true; + } } //--------------------------------------------------------------------- @@ -7736,6 +7750,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) { var_types targetType = putArgStk->TypeGet(); + #ifdef _TARGET_X86_ if (varTypeIsStruct(targetType)) { diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index 1a15175..b6bacfa 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -1494,16 +1494,15 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re emitAttr size = EA_SIZE(attr); UNATIVE_OFFSET sz; -#ifdef _TARGET_AMD64_ - // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes. + + // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. // This would probably be better expressed as a different format or something? - if (insCodeRM(ins) & 0xFF00) + if ((insCodeRM(ins) & 0xFF00) != 0) { sz = 5; } else -#endif // _TARGET_AMD64_ { code_t code = insCodeRM(ins); sz = emitInsSize(insEncodeRMreg(ins, code)); @@ -9230,12 +9229,12 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // now we use the single source as source1 and source2. if (IsThreeOperandBinaryAVXInstruction(ins)) { - // encode source/dest operand reg in 'vvvv' bits in 1's compliement form + // encode source/dest operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, reg1, size, code); } else if (IsThreeOperandMoveAVXInstruction(ins)) { - // encode source operand reg in 'vvvv' bits in 1's compliement form + // encode source operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, reg2, size, code); }