From 271f008f0fe8168dacac45cd3ce1952e6e32bfbb Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 3 Oct 2020 02:30:22 +0300 Subject: [PATCH] RyuJIT: Optimize -X and MathF.Abs(X) for floats (#42164) * Optimize -X and MathF.Abs(X) for floats Co-authored-by: Tanner Gooding --- src/coreclr/src/jit/codegenxarch.cpp | 120 +++++++++-------------------------- src/coreclr/src/jit/emit.cpp | 2 +- 2 files changed, 32 insertions(+), 90 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index dff21ae..627e1db 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -6790,105 +6790,47 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) { regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - assert(varTypeIsFloating(targetType)); - - float f; - double d; - CORINFO_FIELD_HANDLE* bitMask = nullptr; - instruction ins = INS_invalid; - void* cnsAddr = nullptr; - bool dblAlign = false; - - switch (treeNode->OperGet()) - { - case GT_NEG: - // Neg(x) = flip the sign bit. - // Neg(f) = f ^ 0x80000000 - // Neg(d) = d ^ 0x8000000000000000 - ins = INS_xorps; - if (targetType == TYP_FLOAT) - { - bitMask = &negBitmaskFlt; - - static_assert_no_msg(sizeof(float) == sizeof(int)); - *((int*)&f) = 0x80000000; - cnsAddr = &f; - } - else - { - bitMask = &negBitmaskDbl; - - static_assert_no_msg(sizeof(double) == sizeof(__int64)); - *((__int64*)&d) = 0x8000000000000000LL; - cnsAddr = &d; - dblAlign = true; - } - break; - - case GT_INTRINSIC: - assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs); - - // Abs(x) = set sign-bit to zero - // Abs(f) = f & 0x7fffffff - // Abs(d) = d & 0x7fffffffffffffff - ins = INS_andps; - if (targetType == TYP_FLOAT) - { - bitMask = &absBitmaskFlt; + regNumber operandReg = genConsumeReg(treeNode->gtGetOp1()); + emitAttr size = emitTypeSize(treeNode); - static_assert_no_msg(sizeof(float) == sizeof(int)); - *((int*)&f) = 0x7fffffff; - cnsAddr = &f; - } - else - { - bitMask = &absBitmaskDbl; + assert(varTypeIsFloating(treeNode->TypeGet())); + assert(treeNode->gtGetOp1()->isUsedFromReg()); - static_assert_no_msg(sizeof(double) == sizeof(__int64)); - *((__int64*)&d) = 0x7fffffffffffffffLL; - cnsAddr = &d; - dblAlign = true; - } - break; + CORINFO_FIELD_HANDLE* maskFld = nullptr; + UINT64 mask = 0; + instruction ins = INS_invalid; - default: - assert(!"genSSE2: unsupported oper"); - unreached(); - break; + if (treeNode->OperIs(GT_NEG)) + { + // Neg(x) = flip the sign bit. + // Neg(f) = f ^ 0x80000000 x4 (packed) + // Neg(d) = d ^ 0x8000000000000000 x2 (packed) + ins = INS_xorps; + mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL; + maskFld = treeNode->TypeIs(TYP_FLOAT) ? &negBitmaskFlt : &negBitmaskDbl; } - - if (*bitMask == nullptr) + else if (treeNode->OperIs(GT_INTRINSIC)) { - assert(cnsAddr != nullptr); - - UNATIVE_OFFSET cnsSize = genTypeSize(targetType); - UNATIVE_OFFSET cnsAlign = (compiler->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : 1; - - *bitMask = GetEmitter()->emitAnyConst(cnsAddr, cnsSize, cnsAlign); + assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs); + // Abs(x) = set sign-bit to zero + // Abs(f) = f & 0x7fffffff x4 (packed) + // Abs(d) = d & 0x7fffffffffffffff x2 (packed) + ins = INS_andps; + mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7fffffff7fffffffUL : 0x7fffffffffffffffUL; + maskFld = treeNode->TypeIs(TYP_FLOAT) ? &absBitmaskFlt : &absBitmaskDbl; } - - // We need an additional register for bitmask. - regNumber tmpReg = treeNode->GetSingleTempReg(); - - // Move operand into targetReg only if the reg reserved for - // internal purpose is not the same as targetReg. - GenTree* op1 = treeNode->AsOp()->gtOp1; - assert(op1->isUsedFromReg()); - regNumber operandReg = genConsumeReg(op1); - if (tmpReg != targetReg) + else { - if (operandReg != targetReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType); - } + assert(!"genSSE2BitwiseOp: unsupported oper"); + } - operandReg = tmpReg; + if (*maskFld == nullptr) + { + UINT64 maskPack[] = {mask, mask}; + *maskFld = GetEmitter()->emitAnyConst(&maskPack, 16, 16); } - GetEmitter()->emitIns_R_C(ins_Load(targetType, false), emitTypeSize(targetType), tmpReg, *bitMask, 0); - assert(ins != INS_invalid); - inst_RV_RV(ins, targetReg, operandReg, targetType); + GetEmitter()->emitIns_SIMD_R_R_C(ins, size, targetReg, operandReg, *maskFld, 0); } //----------------------------------------------------------------------------------------- diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index 21e2f78..51b52b9 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -5525,7 +5525,7 @@ UNATIVE_OFFSET emitter::emitDataGenBeg(UNATIVE_OFFSET size, UNATIVE_OFFSET align /* Get hold of the current offset */ secOffs = emitConsDsc.dsdOffs; - if (alignment > 4) + if (((secOffs % alignment) != 0) && (alignment > 4)) { // As per the above comment, the minimum alignment is actually 4 // bytes so we don't need to make any adjustments if the requested -- 2.7.4